nnetsauce
1from .base.base import Base 2from .base.baseRegressor import BaseRegressor 3from .boosting.adaBoostClassifier import AdaBoostClassifier 4from .custom.customClassifier import CustomClassifier 5from .custom.customRegressor import CustomRegressor 6from .custom.customBackpropRegressor import CustomBackPropRegressor 7from .datasets import Downloader 8from .deep.deepClassifier import DeepClassifier 9from .deep.deepRegressor import DeepRegressor 10from .deep.deepMTS import DeepMTS 11from .elasticnet2.enet2 import ElasticNet2Regressor 12from .glm.glmClassifier import GLMClassifier 13from .glm.glmRegressor import GLMRegressor 14from .kernel.kernel import KernelRidge 15from .lazypredict.lazydeepClassifier import LazyDeepClassifier, LazyClassifier 16from .lazypredict.lazydeepRegressor import LazyDeepRegressor, LazyRegressor 17from .lazypredict.lazydeepClassifier import LazyDeepClassifier 18from .lazypredict.lazydeepRegressor import LazyDeepRegressor 19from .lazypredict.lazydeepMTS import LazyDeepMTS, LazyMTS 20from .mts.mts import MTS 21from .mts.mlarch import MLARCH 22from .mts.classical import ClassicalMTS 23from .mts.stackedmts import MTSStacker 24from .mts.multioutputmts import MultiOutputMTS 25from .mts.discretetokenmts import DiscreteTokenMTS 26from .multitask.multitaskClassifier import MultitaskClassifier 27from .multitask.simplemultitaskClassifier import SimpleMultitaskClassifier 28from .neuralnet.neuralnetregression import NeuralNetRegressor 29from .neuralnet.neuralnetclassification import NeuralNetClassifier 30from .optimizers.optimizer import Optimizer 31from .predictioninterval import PredictionInterval 32from .predictionset import PredictionSet 33from .quantile.quantileregression import QuantileRegressor 34from .quantile.quantileclassification import QuantileClassifier 35from .randombag.randomBagClassifier import RandomBagClassifier 36from .randombag.randomBagRegressor import RandomBagRegressor 37from .randomfourier.randomfourier import RandomFourierEstimator 38from .rff.rffridge import ( 39 RandomFourierFeaturesRidge, 40 RandomFourierFeaturesRidgeGCV, 41) 42from .ridge.ridge import RidgeRegressor 43from .ridge2.ridge2Classifier import Ridge2Classifier 44from .ridge2.ridge2Regressor import Ridge2Regressor 45from .ridge2.ridge2MultitaskClassifier import Ridge2MultitaskClassifier 46from .ridge2.ridge2MTSJAX import Ridge2Forecaster 47from .ridge2.ridge2multioutputregressor import Ridge2MultiOutputRegressor 48from .rvfl.bayesianrvflRegressor import BayesianRVFLRegressor 49from .rvfl.bayesianrvfl2Regressor import BayesianRVFL2Regressor 50from .sampling import SubSampler 51from .updater import RegressorUpdater, ClassifierUpdater 52from .votingregressor import MedianVotingRegressor 53 54__all__ = [ 55 "AdaBoostClassifier", 56 "Base", 57 "BaseRegressor", 58 "BayesianRVFLRegressor", 59 "BayesianRVFL2Regressor", 60 "ClassicalMTS", 61 "CustomClassifier", 62 "CustomRegressor", 63 "CustomBackPropRegressor", 64 "DeepClassifier", 65 "DeepRegressor", 66 "DeepMTS", 67 "DiscreteTokenMTS", 68 "Downloader", 69 "ElasticNet2Regressor", 70 "GLMClassifier", 71 "GLMRegressor", 72 "KernelRidge", 73 "LazyClassifier", 74 "LazyRegressor", 75 "LazyDeepClassifier", 76 "LazyDeepRegressor", 77 "LazyMTS", 78 "LazyDeepMTS", 79 "MLARCH", 80 "MedianVotingRegressor", 81 "MTS", 82 "MTSStacker", 83 "MultiOutputMTS", 84 "MultitaskClassifier", 85 "NeuralNetRegressor", 86 "NeuralNetClassifier", 87 "PredictionInterval", 88 "PredictionSet", 89 "SimpleMultitaskClassifier", 90 "Optimizer", 91 "QuantileRegressor", 92 "QuantileClassifier", 93 "RandomBagRegressor", 94 "RandomBagClassifier", 95 "RandomFourierEstimator", 96 "RandomFourierFeaturesRidge", 97 "RandomFourierFeaturesRidgeGCV", 98 "RegressorUpdater", 99 "ClassifierUpdater", 100 "RidgeRegressor", 101 "Ridge2Regressor", 102 "Ridge2MultiOutputRegressor", 103 "Ridge2Classifier", 104 "Ridge2MultitaskClassifier", 105 "Ridge2Forecaster", 106 "SubSampler", 107]
21class AdaBoostClassifier(Boosting, ClassifierMixin): 22 """AdaBoost Classification (SAMME) model class derived from class Boosting 23 24 Parameters: 25 26 obj: object 27 any object containing a method fit (obj.fit()) and a method predict 28 (obj.predict()) 29 30 n_estimators: int 31 number of boosting iterations 32 33 learning_rate: float 34 learning rate of the boosting procedure 35 36 n_hidden_features: int 37 number of nodes in the hidden layer 38 39 reg_lambda: float 40 regularization parameter for weights 41 42 reg_alpha: float 43 controls compromize between l1 and l2 norm of weights 44 45 activation_name: str 46 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 47 48 a: float 49 hyperparameter for 'prelu' or 'elu' activation function 50 51 nodes_sim: str 52 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 53 'uniform' 54 55 bias: boolean 56 indicates if the hidden layer contains a bias term (True) or not 57 (False) 58 59 dropout: float 60 regularization parameter; (random) percentage of nodes dropped out 61 of the training 62 63 direct_link: boolean 64 indicates if the original predictors are included (True) in model's 65 fitting or not (False) 66 67 n_clusters: int 68 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 69 no clustering) 70 71 cluster_encode: bool 72 defines how the variable containing clusters is treated (default is one-hot) 73 if `False`, then labels are used, without one-hot encoding 74 75 type_clust: str 76 type of clustering method: currently k-means ('kmeans') or Gaussian 77 Mixture Model ('gmm') 78 79 type_scaling: a tuple of 3 strings 80 scaling methods for inputs, hidden layer, and clustering respectively 81 (and when relevant). 82 Currently available: standardization ('std') or MinMax scaling ('minmax') 83 84 col_sample: float 85 percentage of covariates randomly chosen for training 86 87 row_sample: float 88 percentage of rows chosen for training, by stratified bootstrapping 89 90 seed: int 91 reproducibility seed for nodes_sim=='uniform' 92 93 verbose: int 94 0 for no output, 1 for a progress bar (default is 1) 95 96 method: str 97 type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real) 98 99 backend: str 100 "cpu" or "gpu" or "tpu" 101 102 Attributes: 103 104 alpha_: list 105 AdaBoost coefficients alpha_m 106 107 base_learners_: dict 108 a dictionary containing the base learners 109 110 Examples: 111 112 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py) 113 114 ```python 115 import nnetsauce as ns 116 import numpy as np 117 from sklearn.datasets import load_breast_cancer 118 from sklearn.linear_model import LogisticRegression 119 from sklearn.model_selection import train_test_split 120 from sklearn import metrics 121 from time import time 122 123 breast_cancer = load_breast_cancer() 124 Z = breast_cancer.data 125 t = breast_cancer.target 126 np.random.seed(123) 127 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2) 128 129 # SAMME.R 130 clf = LogisticRegression(solver='liblinear', multi_class = 'ovr', 131 random_state=123) 132 fit_obj = ns.AdaBoostClassifier(clf, 133 n_hidden_features=int(11.22338867), 134 direct_link=True, 135 n_estimators=250, learning_rate=0.01126343, 136 col_sample=0.72684326, row_sample=0.86429443, 137 dropout=0.63078613, n_clusters=2, 138 type_clust="gmm", 139 verbose=1, seed = 123, 140 method="SAMME.R") 141 142 start = time() 143 fit_obj.fit(X_train, y_train) 144 print(f"Elapsed {time() - start}") 145 146 start = time() 147 print(fit_obj.score(X_test, y_test)) 148 print(f"Elapsed {time() - start}") 149 150 preds = fit_obj.predict(X_test) 151 152 print(metrics.classification_report(preds, y_test)) 153 154 ``` 155 156 """ 157 158 # construct the object ----- 159 _estimator_type = "classifier" 160 161 def __init__( 162 self, 163 obj, 164 n_estimators=10, 165 learning_rate=0.1, 166 n_hidden_features=1, 167 reg_lambda=0, 168 reg_alpha=0.5, 169 activation_name="relu", 170 a=0.01, 171 nodes_sim="sobol", 172 bias=True, 173 dropout=0, 174 direct_link=False, 175 n_clusters=2, 176 cluster_encode=True, 177 type_clust="kmeans", 178 type_scaling=("std", "std", "std"), 179 col_sample=1, 180 row_sample=1, 181 seed=123, 182 verbose=1, 183 method="SAMME", 184 backend="cpu", 185 ): 186 self.type_fit = "classification" 187 self.verbose = verbose 188 self.method = method 189 self.reg_lambda = reg_lambda 190 self.reg_alpha = reg_alpha 191 192 super().__init__( 193 obj=obj, 194 n_estimators=n_estimators, 195 learning_rate=learning_rate, 196 n_hidden_features=n_hidden_features, 197 activation_name=activation_name, 198 a=a, 199 nodes_sim=nodes_sim, 200 bias=bias, 201 dropout=dropout, 202 direct_link=direct_link, 203 n_clusters=n_clusters, 204 cluster_encode=cluster_encode, 205 type_clust=type_clust, 206 type_scaling=type_scaling, 207 col_sample=col_sample, 208 row_sample=row_sample, 209 seed=seed, 210 backend=backend, 211 ) 212 213 self.alpha_ = [] 214 self.base_learners_ = dict.fromkeys(range(n_estimators)) 215 216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit Boosting model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 **kwargs: additional parameters to be passed to 229 self.cook_training_set or self.obj.fit 230 231 Returns: 232 233 self: object 234 """ 235 236 assert mx.is_factor(y), "y must contain only integers" 237 238 assert self.method in ( 239 "SAMME", 240 "SAMME.R", 241 ), "`method` must be either 'SAMME' or 'SAMME.R'" 242 243 assert (self.reg_lambda <= 1) & ( 244 self.reg_lambda >= 0 245 ), "must have self.reg_lambda <= 1 & self.reg_lambda >= 0" 246 247 assert (self.reg_alpha <= 1) & ( 248 self.reg_alpha >= 0 249 ), "must have self.reg_alpha <= 1 & self.reg_alpha >= 0" 250 251 # training 252 n, p = X.shape 253 self.n_classes = len(np.unique(y)) 254 self.classes_ = np.unique(y) # for compatibility with sklearn 255 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 256 257 if sample_weight is None: 258 w_m = np.repeat(1.0 / n, n) 259 else: 260 w_m = np.asarray(sample_weight) 261 262 base_learner = CustomClassifier( 263 self.obj, 264 n_hidden_features=self.n_hidden_features, 265 activation_name=self.activation_name, 266 a=self.a, 267 nodes_sim=self.nodes_sim, 268 bias=self.bias, 269 dropout=self.dropout, 270 direct_link=self.direct_link, 271 n_clusters=self.n_clusters, 272 type_clust=self.type_clust, 273 type_scaling=self.type_scaling, 274 col_sample=self.col_sample, 275 row_sample=self.row_sample, 276 seed=self.seed, 277 ) 278 279 if self.verbose == 1: 280 pbar = Progbar(self.n_estimators) 281 282 if self.method == "SAMME": 283 err_m = 1e6 284 err_bound = 1 - 1 / self.n_classes 285 self.alpha_.append(1.0) 286 x_range_n = range(n) 287 288 for m in range(self.n_estimators): 289 preds = base_learner.fit( 290 X, y, sample_weight=w_m.ravel(), **kwargs 291 ).predict(X) 292 293 self.base_learners_.update({m: deepcopy(base_learner)}) 294 295 cond = [y[i] != preds[i] for i in x_range_n] 296 297 err_m = max( 298 sum([elt[0] * elt[1] for elt in zip(cond, w_m)]), 299 2.220446049250313e-16, 300 ) # sum(w_m) == 1 301 302 if self.reg_lambda > 0: 303 err_m += self.reg_lambda * ( 304 (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m]) 305 + self.reg_alpha * sum([abs(x) for x in w_m]) 306 ) 307 308 err_m = min(err_m, err_bound) 309 310 alpha_m = self.learning_rate * log( 311 (self.n_classes - 1) * (1 - err_m) / err_m 312 ) 313 314 self.alpha_.append(alpha_m) 315 316 w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n] 317 318 sum_w_m = sum(w_m_temp) 319 320 w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n]) 321 322 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 323 324 if self.verbose == 1: 325 pbar.update(m) 326 327 if self.verbose == 1: 328 pbar.update(self.n_estimators) 329 330 self.n_estimators = len(self.base_learners_) 331 self.classes_ = np.unique(y) 332 333 return self 334 335 if self.method == "SAMME.R": 336 Y = mo.one_hot_encode2(y, self.n_classes) 337 338 if sample_weight is None: 339 w_m = np.repeat(1.0 / n, n) # (N, 1) 340 341 else: 342 w_m = np.asarray(sample_weight) 343 344 for m in range(self.n_estimators): 345 probs = base_learner.fit( 346 X, y, sample_weight=w_m.ravel(), **kwargs 347 ).predict_proba(X) 348 349 np.clip( 350 a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs 351 ) 352 353 self.base_learners_.update({m: deepcopy(base_learner)}) 354 355 w_m *= np.exp( 356 -1.0 357 * self.learning_rate 358 * (1.0 - 1.0 / self.n_classes) 359 * xlogy(Y, probs).sum(axis=1) 360 ) 361 362 w_m /= np.sum(w_m) 363 364 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 365 366 if self.verbose == 1: 367 pbar.update(m) 368 369 if self.verbose == 1: 370 pbar.update(self.n_estimators) 371 372 self.n_estimators = len(self.base_learners_) 373 self.classes_ = np.unique(y) 374 375 return self 376 377 def predict(self, X, **kwargs): 378 """Predict test data X. 379 380 Parameters: 381 382 X: {array-like}, shape = [n_samples, n_features] 383 Training vectors, where n_samples is the number 384 of samples and n_features is the number of features. 385 386 **kwargs: additional parameters to be passed to 387 self.cook_test_set 388 389 Returns: 390 391 model predictions: {array-like} 392 """ 393 return self.predict_proba(X, **kwargs).argmax(axis=1) 394 395 def predict_proba(self, X, **kwargs): 396 """Predict probabilities for test data X. 397 398 Parameters: 399 400 X: {array-like}, shape = [n_samples, n_features] 401 Training vectors, where n_samples is the number 402 of samples and n_features is the number of features. 403 404 **kwargs: additional parameters to be passed to 405 self.cook_test_set 406 407 Returns: 408 409 probability estimates for test data: {array-like} 410 411 """ 412 413 n_iter = len(self.base_learners_) 414 415 if self.method == "SAMME": 416 ensemble_learner = np.zeros((X.shape[0], self.n_classes)) 417 418 # if self.verbose == 1: 419 # pbar = Progbar(n_iter) 420 421 for idx, base_learner in self.base_learners_.items(): 422 preds = base_learner.predict(X, **kwargs) 423 424 ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2( 425 preds, self.n_classes 426 ) 427 428 # if self.verbose == 1: 429 # pbar.update(idx) 430 431 # if self.verbose == 1: 432 # pbar.update(n_iter) 433 434 expit_ensemble_learner = expit(ensemble_learner) 435 436 sum_ensemble = expit_ensemble_learner.sum(axis=1) 437 438 return expit_ensemble_learner / sum_ensemble[:, None] 439 440 # if self.method == "SAMME.R": 441 ensemble_learner = 0 442 443 # if self.verbose == 1: 444 # pbar = Progbar(n_iter) 445 446 for idx, base_learner in self.base_learners_.items(): 447 probs = base_learner.predict_proba(X, **kwargs) 448 449 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 450 451 log_preds_proba = np.log(probs) 452 453 ensemble_learner += ( 454 log_preds_proba - log_preds_proba.mean(axis=1)[:, None] 455 ) 456 457 # if self.verbose == 1: 458 # pbar.update(idx) 459 460 ensemble_learner *= self.n_classes - 1 461 462 # if self.verbose == 1: 463 # pbar.update(n_iter) 464 465 expit_ensemble_learner = expit(ensemble_learner) 466 467 sum_ensemble = expit_ensemble_learner.sum(axis=1) 468 469 return expit_ensemble_learner / sum_ensemble[:, None] 470 471 @property 472 def _estimator_type(self): 473 return "classifier"
AdaBoost Classification (SAMME) model class derived from class Boosting
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_estimators: int
number of boosting iterations
learning_rate: float
learning rate of the boosting procedure
n_hidden_features: int
number of nodes in the hidden layer
reg_lambda: float
regularization parameter for weights
reg_alpha: float
controls compromize between l1 and l2 norm of weights
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
verbose: int
0 for no output, 1 for a progress bar (default is 1)
method: str
type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real)
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
alpha_: list
AdaBoost coefficients alpha_m
base_learners_: dict
a dictionary containing the base learners
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
# SAMME.R
clf = LogisticRegression(solver='liblinear', multi_class = 'ovr',
random_state=123)
fit_obj = ns.AdaBoostClassifier(clf,
n_hidden_features=int(11.22338867),
direct_link=True,
n_estimators=250, learning_rate=0.01126343,
col_sample=0.72684326, row_sample=0.86429443,
dropout=0.63078613, n_clusters=2,
type_clust="gmm",
verbose=1, seed = 123,
method="SAMME.R")
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")
preds = fit_obj.predict(X_test)
print(metrics.classification_report(preds, y_test))
216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit Boosting model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 **kwargs: additional parameters to be passed to 229 self.cook_training_set or self.obj.fit 230 231 Returns: 232 233 self: object 234 """ 235 236 assert mx.is_factor(y), "y must contain only integers" 237 238 assert self.method in ( 239 "SAMME", 240 "SAMME.R", 241 ), "`method` must be either 'SAMME' or 'SAMME.R'" 242 243 assert (self.reg_lambda <= 1) & ( 244 self.reg_lambda >= 0 245 ), "must have self.reg_lambda <= 1 & self.reg_lambda >= 0" 246 247 assert (self.reg_alpha <= 1) & ( 248 self.reg_alpha >= 0 249 ), "must have self.reg_alpha <= 1 & self.reg_alpha >= 0" 250 251 # training 252 n, p = X.shape 253 self.n_classes = len(np.unique(y)) 254 self.classes_ = np.unique(y) # for compatibility with sklearn 255 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 256 257 if sample_weight is None: 258 w_m = np.repeat(1.0 / n, n) 259 else: 260 w_m = np.asarray(sample_weight) 261 262 base_learner = CustomClassifier( 263 self.obj, 264 n_hidden_features=self.n_hidden_features, 265 activation_name=self.activation_name, 266 a=self.a, 267 nodes_sim=self.nodes_sim, 268 bias=self.bias, 269 dropout=self.dropout, 270 direct_link=self.direct_link, 271 n_clusters=self.n_clusters, 272 type_clust=self.type_clust, 273 type_scaling=self.type_scaling, 274 col_sample=self.col_sample, 275 row_sample=self.row_sample, 276 seed=self.seed, 277 ) 278 279 if self.verbose == 1: 280 pbar = Progbar(self.n_estimators) 281 282 if self.method == "SAMME": 283 err_m = 1e6 284 err_bound = 1 - 1 / self.n_classes 285 self.alpha_.append(1.0) 286 x_range_n = range(n) 287 288 for m in range(self.n_estimators): 289 preds = base_learner.fit( 290 X, y, sample_weight=w_m.ravel(), **kwargs 291 ).predict(X) 292 293 self.base_learners_.update({m: deepcopy(base_learner)}) 294 295 cond = [y[i] != preds[i] for i in x_range_n] 296 297 err_m = max( 298 sum([elt[0] * elt[1] for elt in zip(cond, w_m)]), 299 2.220446049250313e-16, 300 ) # sum(w_m) == 1 301 302 if self.reg_lambda > 0: 303 err_m += self.reg_lambda * ( 304 (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m]) 305 + self.reg_alpha * sum([abs(x) for x in w_m]) 306 ) 307 308 err_m = min(err_m, err_bound) 309 310 alpha_m = self.learning_rate * log( 311 (self.n_classes - 1) * (1 - err_m) / err_m 312 ) 313 314 self.alpha_.append(alpha_m) 315 316 w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n] 317 318 sum_w_m = sum(w_m_temp) 319 320 w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n]) 321 322 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 323 324 if self.verbose == 1: 325 pbar.update(m) 326 327 if self.verbose == 1: 328 pbar.update(self.n_estimators) 329 330 self.n_estimators = len(self.base_learners_) 331 self.classes_ = np.unique(y) 332 333 return self 334 335 if self.method == "SAMME.R": 336 Y = mo.one_hot_encode2(y, self.n_classes) 337 338 if sample_weight is None: 339 w_m = np.repeat(1.0 / n, n) # (N, 1) 340 341 else: 342 w_m = np.asarray(sample_weight) 343 344 for m in range(self.n_estimators): 345 probs = base_learner.fit( 346 X, y, sample_weight=w_m.ravel(), **kwargs 347 ).predict_proba(X) 348 349 np.clip( 350 a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs 351 ) 352 353 self.base_learners_.update({m: deepcopy(base_learner)}) 354 355 w_m *= np.exp( 356 -1.0 357 * self.learning_rate 358 * (1.0 - 1.0 / self.n_classes) 359 * xlogy(Y, probs).sum(axis=1) 360 ) 361 362 w_m /= np.sum(w_m) 363 364 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 365 366 if self.verbose == 1: 367 pbar.update(m) 368 369 if self.verbose == 1: 370 pbar.update(self.n_estimators) 371 372 self.n_estimators = len(self.base_learners_) 373 self.classes_ = np.unique(y) 374 375 return self
Fit Boosting model to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
377 def predict(self, X, **kwargs): 378 """Predict test data X. 379 380 Parameters: 381 382 X: {array-like}, shape = [n_samples, n_features] 383 Training vectors, where n_samples is the number 384 of samples and n_features is the number of features. 385 386 **kwargs: additional parameters to be passed to 387 self.cook_test_set 388 389 Returns: 390 391 model predictions: {array-like} 392 """ 393 return self.predict_proba(X, **kwargs).argmax(axis=1)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
395 def predict_proba(self, X, **kwargs): 396 """Predict probabilities for test data X. 397 398 Parameters: 399 400 X: {array-like}, shape = [n_samples, n_features] 401 Training vectors, where n_samples is the number 402 of samples and n_features is the number of features. 403 404 **kwargs: additional parameters to be passed to 405 self.cook_test_set 406 407 Returns: 408 409 probability estimates for test data: {array-like} 410 411 """ 412 413 n_iter = len(self.base_learners_) 414 415 if self.method == "SAMME": 416 ensemble_learner = np.zeros((X.shape[0], self.n_classes)) 417 418 # if self.verbose == 1: 419 # pbar = Progbar(n_iter) 420 421 for idx, base_learner in self.base_learners_.items(): 422 preds = base_learner.predict(X, **kwargs) 423 424 ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2( 425 preds, self.n_classes 426 ) 427 428 # if self.verbose == 1: 429 # pbar.update(idx) 430 431 # if self.verbose == 1: 432 # pbar.update(n_iter) 433 434 expit_ensemble_learner = expit(ensemble_learner) 435 436 sum_ensemble = expit_ensemble_learner.sum(axis=1) 437 438 return expit_ensemble_learner / sum_ensemble[:, None] 439 440 # if self.method == "SAMME.R": 441 ensemble_learner = 0 442 443 # if self.verbose == 1: 444 # pbar = Progbar(n_iter) 445 446 for idx, base_learner in self.base_learners_.items(): 447 probs = base_learner.predict_proba(X, **kwargs) 448 449 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 450 451 log_preds_proba = np.log(probs) 452 453 ensemble_learner += ( 454 log_preds_proba - log_preds_proba.mean(axis=1)[:, None] 455 ) 456 457 # if self.verbose == 1: 458 # pbar.update(idx) 459 460 ensemble_learner *= self.n_classes - 1 461 462 # if self.verbose == 1: 463 # pbar.update(n_iter) 464 465 expit_ensemble_learner = expit(ensemble_learner) 466 467 sum_ensemble = expit_ensemble_learner.sum(axis=1) 468 469 return expit_ensemble_learner / sum_ensemble[:, None]
Predict probabilities for test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
48class Base(BaseEstimator): 49 """Base model from which all the other classes inherit. 50 51 This class contains the most important data preprocessing/feature engineering methods. 52 53 Parameters: 54 55 n_hidden_features: int 56 number of nodes in the hidden layer 57 58 activation_name: str 59 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 60 61 a: float 62 hyperparameter for 'prelu' or 'elu' activation function 63 64 nodes_sim: str 65 type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton', 66 'uniform' 67 68 bias: boolean 69 indicates if the hidden layer contains a bias term (True) or 70 not (False) 71 72 dropout: float 73 regularization parameter; (random) percentage of nodes dropped out 74 of the training 75 76 direct_link: boolean 77 indicates if the original features are included (True) in model's 78 fitting or not (False) 79 80 n_clusters: int 81 number of clusters for type_clust='kmeans' or type_clust='gmm' 82 clustering (could be 0: no clustering) 83 84 cluster_encode: bool 85 defines how the variable containing clusters is treated (default is one-hot); 86 if `False`, then labels are used, without one-hot encoding 87 88 type_clust: str 89 type of clustering method: currently k-means ('kmeans') or Gaussian 90 Mixture Model ('gmm') 91 92 type_scaling: a tuple of 3 strings 93 scaling methods for inputs, hidden layer, and clustering respectively 94 (and when relevant). 95 Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or max absolute scaling ('maxabs') 96 97 col_sample: float 98 percentage of features randomly chosen for training 99 100 row_sample: float 101 percentage of rows chosen for training, by stratified bootstrapping 102 103 seed: int 104 reproducibility seed for nodes_sim=='uniform', clustering and dropout 105 106 backend: str 107 "cpu" or "gpu" or "tpu" 108 109 """ 110 111 # construct the object ----- 112 113 def __init__( 114 self, 115 n_hidden_features=5, 116 activation_name="relu", 117 a=0.01, 118 nodes_sim="sobol", 119 bias=True, 120 dropout=0, 121 direct_link=True, 122 n_clusters=2, 123 cluster_encode=True, 124 type_clust="kmeans", 125 type_scaling=("std", "std", "std"), 126 col_sample=1, 127 row_sample=1, 128 seed=123, 129 backend="cpu", 130 ): 131 if not JAX_AVAILABLE and backend != "cpu": 132 raise RuntimeError( 133 "JAX is required for this feature. Install with: pip install yourpackage[jax]" 134 ) 135 136 # input checks ----- 137 138 sys_platform = platform.system() 139 140 if (sys_platform == "Windows") and (backend in ("gpu", "tpu")): 141 warnings.warn( 142 "No GPU/TPU computing on Windows yet, backend set to 'cpu'" 143 ) 144 backend = "cpu" 145 146 assert activation_name in ( 147 "relu", 148 "tanh", 149 "sigmoid", 150 "prelu", 151 "elu", 152 ), "'activation_name' must be in ('relu', 'tanh', 'sigmoid','prelu', 'elu')" 153 154 assert nodes_sim in ( 155 "sobol", 156 "hammersley", 157 "uniform", 158 "halton", 159 ), "'nodes_sim' must be in ('sobol', 'hammersley', 'uniform', 'halton')" 160 161 assert type_clust in ( 162 "kmeans", 163 "gmm", 164 ), "'type_clust' must be in ('kmeans', 'gmm')" 165 166 assert (len(type_scaling) == 3) & all( 167 type_scaling[i] in ("minmax", "std", "robust", "maxabs") 168 for i in range(len(type_scaling)) 169 ), "'type_scaling' must have length 3, and available scaling methods are 'minmax' scaling, standardization ('std'), robust scaling ('robust') and max absolute ('maxabs')" 170 171 assert (col_sample >= 0) & ( 172 col_sample <= 1 173 ), "'col_sample' must be comprised between 0 and 1 (both included)" 174 175 assert backend in ( 176 "cpu", 177 "gpu", 178 "tpu", 179 ), "must have 'backend' in ('cpu', 'gpu', 'tpu')" 180 181 self.n_hidden_features = n_hidden_features 182 self.activation_name = activation_name 183 self.a = a 184 self.nodes_sim = nodes_sim 185 self.bias = bias 186 self.seed = seed 187 self.backend = backend 188 self.dropout = dropout 189 self.direct_link = direct_link 190 self.cluster_encode = cluster_encode 191 self.type_clust = type_clust 192 self.type_scaling = type_scaling 193 self.col_sample = col_sample 194 self.row_sample = row_sample 195 self.n_clusters = n_clusters 196 if isinstance(self, RegressorMixin): 197 self.type_fit = "regression" 198 elif isinstance(self, ClassifierMixin): 199 self.type_fit = "classification" 200 self.subsampler_ = None 201 self.index_col_ = None 202 self.index_row_ = True 203 self.clustering_obj_ = None 204 self.clustering_scaler_ = None 205 self.nn_scaler_ = None 206 self.scaler_ = None 207 self.encoder_ = None 208 self.W_ = None 209 self.X_ = None 210 self.y_ = None 211 self.y_mean_ = None 212 self.beta_ = None 213 214 # activation function ----- 215 216 activation_options = { 217 "relu": ac.relu if (self.backend == "cpu") else jnn.relu, 218 "tanh": np.tanh if (self.backend == "cpu") else jnp.tanh, 219 "sigmoid": (ac.sigmoid if (self.backend == "cpu") else jnn.sigmoid), 220 "prelu": partial(ac.prelu, a=a), 221 "elu": ( 222 partial(ac.elu, a=a) 223 if (self.backend == "cpu") 224 else partial(jnn.elu, a=a) 225 ), 226 } 227 228 self.activation_func = activation_options[activation_name] 229 230 # "preprocessing" methods to be inherited ----- 231 232 def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs): # 233 """Create new covariates with kmeans or GMM clustering 234 235 Parameters: 236 237 X: {array-like}, shape = [n_samples, n_features] 238 Training vectors, where n_samples is the number 239 of samples and n_features is the number of features. 240 241 predict: boolean 242 is False on training set and True on test set 243 244 scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler 245 if scaler has already been fitted on training data (online training), it can be passed here 246 247 **kwargs: 248 additional parameters to be passed to the 249 clustering method 250 251 Returns: 252 253 Clusters' matrix, one-hot encoded: {array-like} 254 255 """ 256 257 np.random.seed(self.seed) 258 259 if X is None: 260 X = self.X_ 261 262 if isinstance(X, pd.DataFrame): 263 X = copy.deepcopy(X.values.astype(float)) 264 265 if len(X.shape) == 1: 266 X = X.reshape(1, -1) 267 268 if predict is False: # encode training set 269 # scale input data before clustering 270 self.clustering_scaler_, scaled_X = mo.scale_covariates( 271 X, choice=self.type_scaling[2], scaler=self.clustering_scaler_ 272 ) 273 274 self.clustering_obj_, X_clustered = mo.cluster_covariates( 275 scaled_X, 276 self.n_clusters, 277 self.seed, 278 type_clust=self.type_clust, 279 **kwargs 280 ) 281 282 if self.cluster_encode: 283 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 284 np.float16 285 ) 286 287 return X_clustered.astype(np.float16) 288 289 # if predict == True, encode test set 290 X_clustered = self.clustering_obj_.predict( 291 self.clustering_scaler_.transform(X) 292 ) 293 294 if self.cluster_encode == True: 295 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 296 np.float16 297 ) 298 299 return X_clustered.astype(np.float16) 300 301 def create_layer(self, scaled_X, W=None): 302 """Create hidden layer. 303 304 Parameters: 305 306 scaled_X: {array-like}, shape = [n_samples, n_features] 307 Training vectors, where n_samples is the number 308 of samples and n_features is the number of features 309 310 W: {array-like}, shape = [n_features, hidden_features] 311 if provided, constructs the hidden layer with W; otherwise computed internally 312 313 Returns: 314 315 Hidden layer matrix: {array-like} 316 317 """ 318 319 n_features = scaled_X.shape[1] 320 321 # hash_sim = { 322 # "sobol": generate_sobol, 323 # "hammersley": generate_hammersley, 324 # "uniform": generate_uniform, 325 # "halton": generate_halton 326 # } 327 328 if self.bias is False: # no bias term in the hidden layer 329 if W is None: 330 if self.nodes_sim == "sobol": 331 self.W_ = generate_sobol( 332 n_dims=n_features, 333 n_points=self.n_hidden_features, 334 seed=self.seed, 335 ) 336 elif self.nodes_sim == "hammersley": 337 self.W_ = generate_hammersley( 338 n_dims=n_features, 339 n_points=self.n_hidden_features, 340 seed=self.seed, 341 ) 342 elif self.nodes_sim == "uniform": 343 self.W_ = generate_uniform( 344 n_dims=n_features, 345 n_points=self.n_hidden_features, 346 seed=self.seed, 347 ) 348 else: 349 self.W_ = generate_halton( 350 n_dims=n_features, 351 n_points=self.n_hidden_features, 352 seed=self.seed, 353 ) 354 355 assert ( 356 scaled_X.shape[1] == self.W_.shape[0] 357 ), "check dimensions of covariates X and matrix W" 358 359 return mo.dropout( 360 x=self.activation_func( 361 mo.safe_sparse_dot( 362 a=scaled_X, b=self.W_, backend=self.backend 363 ) 364 ), 365 drop_prob=self.dropout, 366 seed=self.seed, 367 ) 368 369 # W is not none 370 assert ( 371 scaled_X.shape[1] == W.shape[0] 372 ), "check dimensions of covariates X and matrix W" 373 374 # self.W_ = W 375 return mo.dropout( 376 x=self.activation_func( 377 mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend) 378 ), 379 drop_prob=self.dropout, 380 seed=self.seed, 381 ) 382 383 # with bias term in the hidden layer 384 if W is None: 385 n_features_1 = n_features + 1 386 387 if self.nodes_sim == "sobol": 388 self.W_ = generate_sobol( 389 n_dims=n_features_1, 390 n_points=self.n_hidden_features, 391 seed=self.seed, 392 ) 393 elif self.nodes_sim == "hammersley": 394 self.W_ = generate_hammersley( 395 n_dims=n_features_1, 396 n_points=self.n_hidden_features, 397 seed=self.seed, 398 ) 399 elif self.nodes_sim == "uniform": 400 self.W_ = generate_uniform( 401 n_dims=n_features_1, 402 n_points=self.n_hidden_features, 403 seed=self.seed, 404 ) 405 else: 406 self.W_ = generate_halton( 407 n_dims=n_features_1, 408 n_points=self.n_hidden_features, 409 seed=self.seed, 410 ) 411 412 # self.W_ = hash_sim[self.nodes_sim]( 413 # n_dims=n_features_1, 414 # n_points=self.n_hidden_features, 415 # seed=self.seed, 416 # ) 417 418 return mo.dropout( 419 x=self.activation_func( 420 mo.safe_sparse_dot( 421 a=mo.cbind( 422 np.ones(scaled_X.shape[0]), 423 scaled_X, 424 backend=self.backend, 425 ), 426 b=self.W_, 427 backend=self.backend, 428 ) 429 ), 430 drop_prob=self.dropout, 431 seed=self.seed, 432 ) 433 434 # W is not None 435 # self.W_ = W 436 return mo.dropout( 437 x=self.activation_func( 438 mo.safe_sparse_dot( 439 a=mo.cbind( 440 np.ones(scaled_X.shape[0]), 441 scaled_X, 442 backend=self.backend, 443 ), 444 b=W, 445 backend=self.backend, 446 ) 447 ), 448 drop_prob=self.dropout, 449 seed=self.seed, 450 ) 451 452 def _jax_create_layer(self, scaled_X, W=None): 453 """JAX-compatible version of create_layer that exactly matches the original functionality.""" 454 key = jax.random.PRNGKey(self.seed) 455 n_features = scaled_X.shape[1] 456 457 # Generate weights if not provided 458 if W is None: 459 if self.bias: 460 n_features_1 = n_features + 1 461 shape = (n_features_1, self.n_hidden_features) 462 else: 463 shape = (n_features, self.n_hidden_features) 464 465 # JAX-compatible weight generation matching original behavior 466 if self.nodes_sim == "sobol": 467 W_np = generate_sobol( 468 n_dims=n_features_1, 469 n_points=self.n_hidden_features, 470 seed=self.seed, 471 ) 472 W = jnp.asarray(W_np) 473 elif self.nodes_sim == "hammersley": 474 W_np = generate_hammersley( 475 n_dims=n_features_1, 476 n_points=self.n_hidden_features, 477 seed=self.seed, 478 ) 479 W = jnp.asarray(W_np) 480 elif self.nodes_sim == "uniform": 481 key, subkey = jax.random.split(key) 482 W = jax.random.uniform( 483 subkey, shape=shape, minval=-1.0, maxval=1.0 484 ) 485 else: # halton 486 W_np = generate_halton( 487 n_dims=n_features_1, 488 n_points=self.n_hidden_features, 489 seed=self.seed, 490 ) 491 W = jnp.asarray(W_np) 492 493 self.W_ = np.array(W) # Store as numpy for original methods 494 495 # Prepare input with bias if needed 496 if self.bias: 497 X_with_bias = jnp.hstack( 498 [jnp.ones((scaled_X.shape[0], 1)), scaled_X] 499 ) 500 print("X_with_bias shape:", X_with_bias.shape) 501 print("W shape:", W.shape) 502 linear_output = jnp.dot(X_with_bias, W) 503 else: 504 linear_output = jnp.dot(scaled_X, W) 505 506 # Apply activation function 507 if self.activation_name == "relu": 508 activated = jax.nn.relu(linear_output) 509 elif self.activation_name == "tanh": 510 activated = jnp.tanh(linear_output) 511 elif self.activation_name == "sigmoid": 512 activated = jax.nn.sigmoid(linear_output) 513 else: # leaky relu 514 activated = jax.nn.leaky_relu(linear_output, negative_slope=self.a) 515 516 # Apply dropout 517 if self.dropout > 0: 518 key, subkey = jax.random.split(key) 519 mask = jax.random.bernoulli( 520 subkey, p=1 - self.dropout, shape=activated.shape 521 ) 522 activated = jnp.where(mask, activated / (1 - self.dropout), 0) 523 524 return activated 525 526 def cook_training_set(self, y=None, X=None, W=None, **kwargs): 527 """Create new hidden features for training set, with hidden layer, center the response. 528 529 Parameters: 530 531 y: array-like, shape = [n_samples] 532 Target values 533 534 X: {array-like}, shape = [n_samples, n_features] 535 Training vectors, where n_samples is the number 536 of samples and n_features is the number of features 537 538 W: {array-like}, shape = [n_features, hidden_features] 539 if provided, constructs the hidden layer via W 540 541 Returns: 542 543 (centered response, direct link + hidden layer matrix): {tuple} 544 545 """ 546 547 # either X and y are stored or not 548 # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None)) 549 if self.n_hidden_features > 0: # has a hidden layer 550 assert ( 551 len(self.type_scaling) >= 2 552 ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0" 553 554 if X is None: 555 if self.col_sample == 1: 556 input_X = self.X_ 557 else: 558 n_features = self.X_.shape[1] 559 new_n_features = int(np.ceil(n_features * self.col_sample)) 560 assert ( 561 new_n_features >= 1 562 ), "check class attribute 'col_sample' and the number of covariates provided for X" 563 np.random.seed(self.seed) 564 index_col = np.random.choice( 565 range(n_features), size=new_n_features, replace=False 566 ) 567 self.index_col_ = index_col 568 input_X = self.X_[:, self.index_col_] 569 570 else: # X is not None # keep X vs self.X_ 571 if isinstance(X, pd.DataFrame): 572 X = copy.deepcopy(X.values.astype(float)) 573 574 if self.col_sample == 1: 575 input_X = X 576 else: 577 n_features = X.shape[1] 578 new_n_features = int(np.ceil(n_features * self.col_sample)) 579 assert ( 580 new_n_features >= 1 581 ), "check class attribute 'col_sample' and the number of covariates provided for X" 582 np.random.seed(self.seed) 583 index_col = np.random.choice( 584 range(n_features), size=new_n_features, replace=False 585 ) 586 self.index_col_ = index_col 587 input_X = X[:, self.index_col_] 588 589 if self.n_clusters <= 0: 590 # data without any clustering: self.n_clusters is None ----- 591 592 if self.n_hidden_features > 0: # with hidden layer 593 self.nn_scaler_, scaled_X = mo.scale_covariates( 594 input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_ 595 ) 596 Phi_X = ( 597 self.create_layer(scaled_X) 598 if W is None 599 else self.create_layer(scaled_X, W=W) 600 ) 601 Z = ( 602 mo.cbind(input_X, Phi_X, backend=self.backend) 603 if self.direct_link is True 604 else Phi_X 605 ) 606 self.scaler_, scaled_Z = mo.scale_covariates( 607 Z, choice=self.type_scaling[0], scaler=self.scaler_ 608 ) 609 else: # no hidden layer 610 Z = input_X 611 self.scaler_, scaled_Z = mo.scale_covariates( 612 Z, choice=self.type_scaling[0], scaler=self.scaler_ 613 ) 614 615 else: 616 # data with clustering: self.n_clusters is not None ----- # keep 617 618 augmented_X = mo.cbind( 619 input_X, 620 self.encode_clusters(input_X, **kwargs), 621 backend=self.backend, 622 ) 623 624 if self.n_hidden_features > 0: # with hidden layer 625 self.nn_scaler_, scaled_X = mo.scale_covariates( 626 augmented_X, 627 choice=self.type_scaling[1], 628 scaler=self.nn_scaler_, 629 ) 630 Phi_X = ( 631 self.create_layer(scaled_X) 632 if W is None 633 else self.create_layer(scaled_X, W=W) 634 ) 635 Z = ( 636 mo.cbind(augmented_X, Phi_X, backend=self.backend) 637 if self.direct_link is True 638 else Phi_X 639 ) 640 self.scaler_, scaled_Z = mo.scale_covariates( 641 Z, choice=self.type_scaling[0], scaler=self.scaler_ 642 ) 643 else: # no hidden layer 644 Z = augmented_X 645 self.scaler_, scaled_Z = mo.scale_covariates( 646 Z, choice=self.type_scaling[0], scaler=self.scaler_ 647 ) 648 649 # Returning model inputs ----- 650 if mx.is_factor(y) is False: # regression 651 # center y 652 if y is None: 653 self.y_mean_, centered_y = mo.center_response(self.y_) 654 else: 655 self.y_mean_, centered_y = mo.center_response(y) 656 657 # y is subsampled 658 if self.row_sample < 1: 659 n, p = Z.shape 660 661 self.subsampler_ = ( 662 SubSampler( 663 y=self.y_, row_sample=self.row_sample, seed=self.seed 664 ) 665 if y is None 666 else SubSampler( 667 y=y, row_sample=self.row_sample, seed=self.seed 668 ) 669 ) 670 671 self.index_row_ = self.subsampler_.subsample() 672 673 n_row_sample = len(self.index_row_) 674 # regression 675 return ( 676 centered_y[self.index_row_].reshape(n_row_sample), 677 self.scaler_.transform( 678 Z[self.index_row_, :].reshape(n_row_sample, p) 679 ), 680 ) 681 # y is not subsampled 682 # regression 683 return (centered_y, self.scaler_.transform(Z)) 684 685 # classification 686 # y is subsampled 687 if self.row_sample < 1: 688 n, p = Z.shape 689 690 self.subsampler_ = ( 691 SubSampler( 692 y=self.y_, row_sample=self.row_sample, seed=self.seed 693 ) 694 if y is None 695 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 696 ) 697 698 self.index_row_ = self.subsampler_.subsample() 699 700 n_row_sample = len(self.index_row_) 701 # classification 702 return ( 703 y[self.index_row_].reshape(n_row_sample), 704 self.scaler_.transform( 705 Z[self.index_row_, :].reshape(n_row_sample, p) 706 ), 707 ) 708 # y is not subsampled 709 # classification 710 return (y, self.scaler_.transform(Z)) 711 712 def cook_test_set(self, X, **kwargs): 713 """Transform data from test set, with hidden layer. 714 715 Parameters: 716 717 X: {array-like}, shape = [n_samples, n_features] 718 Training vectors, where n_samples is the number 719 of samples and n_features is the number of features 720 721 **kwargs: additional parameters to be passed to self.encode_cluster 722 723 Returns: 724 725 Transformed test set : {array-like} 726 """ 727 728 if isinstance(X, pd.DataFrame): 729 X = copy.deepcopy(X.values.astype(float)) 730 731 if len(X.shape) == 1: 732 X = X.reshape(1, -1) 733 734 if ( 735 self.n_clusters == 0 736 ): # data without clustering: self.n_clusters is None ----- 737 if self.n_hidden_features > 0: 738 # if hidden layer 739 scaled_X = ( 740 self.nn_scaler_.transform(X) 741 if (self.col_sample == 1) 742 else self.nn_scaler_.transform(X[:, self.index_col_]) 743 ) 744 Phi_X = self.create_layer(scaled_X, self.W_) 745 if self.direct_link: 746 return self.scaler_.transform( 747 mo.cbind(scaled_X, Phi_X, backend=self.backend) 748 ) 749 # when self.direct_link == False 750 return self.scaler_.transform(Phi_X) 751 # if no hidden layer # self.n_hidden_features == 0 752 return self.scaler_.transform(X) 753 754 # data with clustering: self.n_clusters > 0 ----- 755 if self.col_sample == 1: 756 predicted_clusters = self.encode_clusters( 757 X=X, predict=True, **kwargs 758 ) 759 augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend) 760 else: 761 predicted_clusters = self.encode_clusters( 762 X=X[:, self.index_col_], predict=True, **kwargs 763 ) 764 augmented_X = mo.cbind( 765 X[:, self.index_col_], predicted_clusters, backend=self.backend 766 ) 767 768 if self.n_hidden_features > 0: # if hidden layer 769 scaled_X = self.nn_scaler_.transform(augmented_X) 770 Phi_X = self.create_layer(scaled_X, self.W_) 771 if self.direct_link: 772 return self.scaler_.transform( 773 mo.cbind(augmented_X, Phi_X, backend=self.backend) 774 ) 775 return self.scaler_.transform(Phi_X) 776 777 # if no hidden layer 778 return self.scaler_.transform(augmented_X) 779 780 def cook_training_set_jax(self, y=None, X=None, W=None, **kwargs): 781 """JAX-compatible version of cook_training_set that maintains side effects.""" 782 # Initialize random key 783 key = jax.random.PRNGKey(self.seed) 784 785 # Convert inputs to JAX arrays 786 X = jnp.asarray(X) if X is not None else jnp.asarray(self.X_) 787 y = jnp.asarray(y) if y is not None else jnp.asarray(self.y_) 788 789 # Handle column sampling 790 if self.col_sample < 1: 791 n_features = X.shape[1] 792 new_n_features = int(jnp.ceil(n_features * self.col_sample)) 793 assert new_n_features >= 1, "Invalid col_sample" 794 795 key, subkey = jax.random.split(key) 796 index_col = jax.random.choice( 797 subkey, n_features, shape=(new_n_features,), replace=False 798 ) 799 self.index_col_ = np.array( 800 index_col 801 ) # Store as numpy for original methods 802 input_X = X[:, index_col] 803 n_features = ( 804 new_n_features # Update n_features after column sampling 805 ) 806 else: 807 input_X = X 808 n_features = X.shape[1] 809 810 augmented_X = input_X 811 812 # JAX-compatible scaling 813 def jax_scale(data, mean=None, std=None): 814 if mean is None: 815 mean = jnp.mean(data, axis=0) 816 if std is None: 817 std = jnp.std(data, axis=0) 818 return (data - mean) / (std + 1e-10), mean, std 819 820 # Hidden layer processing 821 if self.n_hidden_features > 0: 822 # Initialize weights if not provided 823 if W is None: 824 shape = (n_features, self.n_hidden_features) 825 826 # JAX-compatible weight generation 827 if self.nodes_sim == "uniform": 828 key, subkey = jax.random.split(key) 829 W = jax.random.uniform( 830 subkey, shape=shape, minval=-1.0, maxval=1.0 831 ) * (1 / jnp.sqrt(n_features)) 832 else: 833 # For other sequences, use numpy generation then convert to JAX 834 if self.nodes_sim == "sobol": 835 W_np = generate_sobol( 836 n_dims=shape[0], 837 n_points=shape[1], 838 seed=self.seed, 839 ) 840 elif self.nodes_sim == "hammersley": 841 W_np = generate_hammersley( 842 n_dims=shape[0], 843 n_points=shape[1], 844 seed=self.seed, 845 ) 846 elif self.nodes_sim == "halton": 847 W_np = generate_halton( 848 n_dims=shape[0], 849 n_points=shape[1], 850 seed=self.seed, 851 ) 852 else: # default to uniform 853 key, subkey = jax.random.split(key) 854 W = jax.random.uniform( 855 subkey, shape=shape, minval=-1.0, maxval=1.0 856 ) * (1 / jnp.sqrt(n_features)) 857 858 if self.nodes_sim in ["sobol", "hammersley", "halton"]: 859 W = jnp.asarray(W_np) * (1 / jnp.sqrt(n_features)) 860 861 self.W_ = np.array(W) # Store as numpy for original methods 862 863 # Scale features 864 scaled_X, self.nn_mean_, self.nn_std_ = jax_scale( 865 augmented_X, 866 getattr(self, "nn_mean_", None), 867 getattr(self, "nn_std_", None), 868 ) 869 870 # Create hidden layer with proper bias handling 871 linear_output = jnp.dot(scaled_X, W) 872 873 # Apply activation 874 if self.activation_name == "relu": 875 Phi_X = jax.nn.relu(linear_output) 876 elif self.activation_name == "tanh": 877 Phi_X = jnp.tanh(linear_output) 878 elif self.activation_name == "sigmoid": 879 Phi_X = jax.nn.sigmoid(linear_output) 880 else: # leaky relu 881 Phi_X = jax.nn.leaky_relu(linear_output, negative_slope=self.a) 882 883 # Apply dropout 884 if self.dropout > 0: 885 key, subkey = jax.random.split(key) 886 mask = jax.random.bernoulli( 887 subkey, p=1 - self.dropout, shape=Phi_X.shape 888 ) 889 Phi_X = jnp.where(mask, Phi_X / (1 - self.dropout), 0) 890 891 Z = jnp.hstack([scaled_X, Phi_X]) if self.direct_link else Phi_X 892 else: 893 Z = augmented_X 894 895 # Final scaling 896 scaled_Z, self.scale_mean_, self.scale_std_ = jax_scale( 897 Z, 898 getattr(self, "scale_mean_", None), 899 getattr(self, "scale_std_", None), 900 ) 901 902 # Center response for regression 903 if not hasattr(mx, "is_factor") or not mx.is_factor( 904 y 905 ): # regression case 906 self.y_mean_ = float( 907 jnp.mean(y) 908 ) # Convert to Python float for compatibility 909 centered_y = y - self.y_mean_ 910 else: 911 centered_y = y 912 913 # Handle row sampling 914 if self.row_sample < 1: 915 key, subkey = jax.random.split(key) 916 n_samples = Z.shape[0] 917 n_row_sample = int(jnp.ceil(n_samples * self.row_sample)) 918 index_row = jax.random.choice( 919 subkey, n_samples, shape=(n_row_sample,), replace=False 920 ) 921 self.index_row_ = np.array( 922 index_row 923 ) # Store as numpy for original methods 924 return (centered_y[index_row], scaled_Z[index_row]) 925 926 return (centered_y, scaled_Z) 927 928 def cook_test_set_jax(self, X, **kwargs): 929 """JAX-compatible test set processing with matching dimension handling.""" 930 X = jnp.asarray(X) 931 932 if len(X.shape) == 1: 933 X = X.reshape(1, -1) 934 935 # Handle column sampling 936 input_X = ( 937 X if self.col_sample == 1 else X[:, jnp.asarray(self.index_col_)] 938 ) 939 940 augmented_X = input_X 941 942 # JAX-compatible scaling 943 scaled_X = (augmented_X - self.nn_mean_) / (self.nn_std_ + 1e-10) 944 945 # Process hidden layer if needed 946 if self.n_hidden_features > 0: 947 Phi_X = self._jax_create_layer(scaled_X, jnp.asarray(self.W_)) 948 Z = jnp.hstack([scaled_X, Phi_X]) if self.direct_link else Phi_X 949 else: 950 Z = augmented_X 951 952 # Final scaling 953 scaled_Z = (Z - self.scale_mean_) / (self.scale_std_ + 1e-10) 954 955 return scaled_Z 956 957 def _jax_create_layer(self, X, W): 958 """JAX-compatible hidden layer creation.""" 959 # print("X", X.shape) 960 # print("W", W.shape) 961 # print("self.W_", self.W_.shape) 962 linear_output = jnp.dot(X, W) 963 964 if self.activation_name == "relu": 965 return jax.nn.relu(linear_output) 966 elif self.activation_name == "tanh": 967 return jnp.tanh(linear_output) 968 elif self.activation_name == "sigmoid": 969 return jax.nn.sigmoid(linear_output) 970 else: # leaky relu 971 return jax.nn.leaky_relu(linear_output, negative_slope=self.a) 972 973 def cross_val_score( 974 self, 975 X, 976 y, 977 cv=5, 978 scoring="accuracy", 979 random_state=42, 980 n_jobs=-1, 981 epsilon=0.5, 982 penalized=True, 983 objective="abs", 984 **kwargs 985 ): 986 """ 987 Penalized Cross-validation score for a model. 988 989 Parameters: 990 991 X: {array-like}, shape = [n_samples, n_features] 992 Training vectors, where n_samples is the number 993 of samples and n_features is the number of features 994 995 y: array-like, shape = [n_samples] 996 Target values 997 998 X_test: {array-like}, shape = [n_samples, n_features] 999 Test vectors, where n_samples is the number 1000 of samples and n_features is the number of features 1001 1002 y_test: array-like, shape = [n_samples] 1003 Target values 1004 1005 cv: int 1006 Number of folds 1007 1008 scoring: str 1009 Scoring metric 1010 1011 random_state: int 1012 Random state 1013 1014 n_jobs: int 1015 Number of jobs to run in parallel 1016 1017 epsilon: float 1018 Penalty parameter 1019 1020 penalized: bool 1021 Whether to obtain penalized cross-validation score or not 1022 1023 objective: str 1024 'abs': Minimize the absolute difference between cross-validation score and validation score 1025 'relative': Minimize the relative difference between cross-validation score and validation score 1026 Returns: 1027 1028 A namedtuple with the following fields: 1029 - cv_score: float 1030 cross-validation score 1031 - val_score: float 1032 validation score 1033 - penalized_score: float 1034 penalized cross-validation score: cv_score / val_score + epsilon*(1/val_score + 1/cv_score) 1035 If higher scoring metric is better, minimize the function result. 1036 If lower scoring metric is better, maximize the function result. 1037 """ 1038 if scoring == "accuracy": 1039 scoring_func = accuracy_score 1040 elif scoring == "balanced_accuracy": 1041 scoring_func = balanced_accuracy_score 1042 elif scoring == "f1": 1043 scoring_func = f1_score 1044 elif scoring == "roc_auc": 1045 scoring_func = roc_auc_score 1046 elif scoring == "r2": 1047 scoring_func = r2_score 1048 elif scoring == "mse": 1049 scoring_func = mean_squared_error 1050 elif scoring == "mae": 1051 scoring_func = mean_absolute_error 1052 elif scoring == "mape": 1053 scoring_func = mean_absolute_percentage_error 1054 elif scoring == "rmse": 1055 1056 def scoring_func(y_true, y_pred): 1057 return np.sqrt(mean_squared_error(y_true, y_pred)) 1058 1059 X_train, X_val, y_train, y_val = train_test_split( 1060 X, y, test_size=0.2, random_state=random_state 1061 ) 1062 1063 res = cross_val_score( 1064 self, X_train, y_train, cv=cv, scoring=scoring, n_jobs=n_jobs 1065 ) # cross-validation error 1066 1067 if penalized == False: 1068 return res 1069 1070 DescribeResult = namedtuple( 1071 "DescribeResult", ["cv_score", "val_score", "penalized_score"] 1072 ) 1073 1074 numerator = res.mean() 1075 1076 # Evaluate on the (cv+1)-th fold 1077 preds_val = self.fit(X_train, y_train).predict(X_val) 1078 try: 1079 denominator = scoring(y_val, preds_val) # validation error 1080 except Exception as e: 1081 denominator = scoring_func(y_val, preds_val) 1082 1083 # if higher is better 1084 if objective == "abs": 1085 penalized_score = np.abs(numerator - denominator) + epsilon * ( 1086 1 / denominator + 1 / numerator 1087 ) 1088 elif objective == "relative": 1089 ratio = numerator / denominator 1090 penalized_score = np.abs(ratio - 1) + epsilon * ( 1091 1 / denominator + 1 / numerator 1092 ) 1093 1094 return DescribeResult( 1095 cv_score=numerator, 1096 val_score=denominator, 1097 penalized_score=penalized_score, 1098 )
Base model from which all the other classes inherit.
This class contains the most important data preprocessing/feature engineering methods.
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or
not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for type_clust='kmeans' or type_clust='gmm'
clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot);
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or max absolute scaling ('maxabs')
col_sample: float
percentage of features randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform', clustering and dropout
backend: str
"cpu" or "gpu" or "tpu"
232 def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs): # 233 """Create new covariates with kmeans or GMM clustering 234 235 Parameters: 236 237 X: {array-like}, shape = [n_samples, n_features] 238 Training vectors, where n_samples is the number 239 of samples and n_features is the number of features. 240 241 predict: boolean 242 is False on training set and True on test set 243 244 scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler 245 if scaler has already been fitted on training data (online training), it can be passed here 246 247 **kwargs: 248 additional parameters to be passed to the 249 clustering method 250 251 Returns: 252 253 Clusters' matrix, one-hot encoded: {array-like} 254 255 """ 256 257 np.random.seed(self.seed) 258 259 if X is None: 260 X = self.X_ 261 262 if isinstance(X, pd.DataFrame): 263 X = copy.deepcopy(X.values.astype(float)) 264 265 if len(X.shape) == 1: 266 X = X.reshape(1, -1) 267 268 if predict is False: # encode training set 269 # scale input data before clustering 270 self.clustering_scaler_, scaled_X = mo.scale_covariates( 271 X, choice=self.type_scaling[2], scaler=self.clustering_scaler_ 272 ) 273 274 self.clustering_obj_, X_clustered = mo.cluster_covariates( 275 scaled_X, 276 self.n_clusters, 277 self.seed, 278 type_clust=self.type_clust, 279 **kwargs 280 ) 281 282 if self.cluster_encode: 283 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 284 np.float16 285 ) 286 287 return X_clustered.astype(np.float16) 288 289 # if predict == True, encode test set 290 X_clustered = self.clustering_obj_.predict( 291 self.clustering_scaler_.transform(X) 292 ) 293 294 if self.cluster_encode == True: 295 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 296 np.float16 297 ) 298 299 return X_clustered.astype(np.float16)
Create new covariates with kmeans or GMM clustering
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
predict: boolean
is False on training set and True on test set
scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
if scaler has already been fitted on training data (online training), it can be passed here
**kwargs:
additional parameters to be passed to the
clustering method
Returns:
Clusters' matrix, one-hot encoded: {array-like}
301 def create_layer(self, scaled_X, W=None): 302 """Create hidden layer. 303 304 Parameters: 305 306 scaled_X: {array-like}, shape = [n_samples, n_features] 307 Training vectors, where n_samples is the number 308 of samples and n_features is the number of features 309 310 W: {array-like}, shape = [n_features, hidden_features] 311 if provided, constructs the hidden layer with W; otherwise computed internally 312 313 Returns: 314 315 Hidden layer matrix: {array-like} 316 317 """ 318 319 n_features = scaled_X.shape[1] 320 321 # hash_sim = { 322 # "sobol": generate_sobol, 323 # "hammersley": generate_hammersley, 324 # "uniform": generate_uniform, 325 # "halton": generate_halton 326 # } 327 328 if self.bias is False: # no bias term in the hidden layer 329 if W is None: 330 if self.nodes_sim == "sobol": 331 self.W_ = generate_sobol( 332 n_dims=n_features, 333 n_points=self.n_hidden_features, 334 seed=self.seed, 335 ) 336 elif self.nodes_sim == "hammersley": 337 self.W_ = generate_hammersley( 338 n_dims=n_features, 339 n_points=self.n_hidden_features, 340 seed=self.seed, 341 ) 342 elif self.nodes_sim == "uniform": 343 self.W_ = generate_uniform( 344 n_dims=n_features, 345 n_points=self.n_hidden_features, 346 seed=self.seed, 347 ) 348 else: 349 self.W_ = generate_halton( 350 n_dims=n_features, 351 n_points=self.n_hidden_features, 352 seed=self.seed, 353 ) 354 355 assert ( 356 scaled_X.shape[1] == self.W_.shape[0] 357 ), "check dimensions of covariates X and matrix W" 358 359 return mo.dropout( 360 x=self.activation_func( 361 mo.safe_sparse_dot( 362 a=scaled_X, b=self.W_, backend=self.backend 363 ) 364 ), 365 drop_prob=self.dropout, 366 seed=self.seed, 367 ) 368 369 # W is not none 370 assert ( 371 scaled_X.shape[1] == W.shape[0] 372 ), "check dimensions of covariates X and matrix W" 373 374 # self.W_ = W 375 return mo.dropout( 376 x=self.activation_func( 377 mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend) 378 ), 379 drop_prob=self.dropout, 380 seed=self.seed, 381 ) 382 383 # with bias term in the hidden layer 384 if W is None: 385 n_features_1 = n_features + 1 386 387 if self.nodes_sim == "sobol": 388 self.W_ = generate_sobol( 389 n_dims=n_features_1, 390 n_points=self.n_hidden_features, 391 seed=self.seed, 392 ) 393 elif self.nodes_sim == "hammersley": 394 self.W_ = generate_hammersley( 395 n_dims=n_features_1, 396 n_points=self.n_hidden_features, 397 seed=self.seed, 398 ) 399 elif self.nodes_sim == "uniform": 400 self.W_ = generate_uniform( 401 n_dims=n_features_1, 402 n_points=self.n_hidden_features, 403 seed=self.seed, 404 ) 405 else: 406 self.W_ = generate_halton( 407 n_dims=n_features_1, 408 n_points=self.n_hidden_features, 409 seed=self.seed, 410 ) 411 412 # self.W_ = hash_sim[self.nodes_sim]( 413 # n_dims=n_features_1, 414 # n_points=self.n_hidden_features, 415 # seed=self.seed, 416 # ) 417 418 return mo.dropout( 419 x=self.activation_func( 420 mo.safe_sparse_dot( 421 a=mo.cbind( 422 np.ones(scaled_X.shape[0]), 423 scaled_X, 424 backend=self.backend, 425 ), 426 b=self.W_, 427 backend=self.backend, 428 ) 429 ), 430 drop_prob=self.dropout, 431 seed=self.seed, 432 ) 433 434 # W is not None 435 # self.W_ = W 436 return mo.dropout( 437 x=self.activation_func( 438 mo.safe_sparse_dot( 439 a=mo.cbind( 440 np.ones(scaled_X.shape[0]), 441 scaled_X, 442 backend=self.backend, 443 ), 444 b=W, 445 backend=self.backend, 446 ) 447 ), 448 drop_prob=self.dropout, 449 seed=self.seed, 450 )
Create hidden layer.
Parameters:
scaled_X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
W: {array-like}, shape = [n_features, hidden_features]
if provided, constructs the hidden layer with W; otherwise computed internally
Returns:
Hidden layer matrix: {array-like}
526 def cook_training_set(self, y=None, X=None, W=None, **kwargs): 527 """Create new hidden features for training set, with hidden layer, center the response. 528 529 Parameters: 530 531 y: array-like, shape = [n_samples] 532 Target values 533 534 X: {array-like}, shape = [n_samples, n_features] 535 Training vectors, where n_samples is the number 536 of samples and n_features is the number of features 537 538 W: {array-like}, shape = [n_features, hidden_features] 539 if provided, constructs the hidden layer via W 540 541 Returns: 542 543 (centered response, direct link + hidden layer matrix): {tuple} 544 545 """ 546 547 # either X and y are stored or not 548 # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None)) 549 if self.n_hidden_features > 0: # has a hidden layer 550 assert ( 551 len(self.type_scaling) >= 2 552 ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0" 553 554 if X is None: 555 if self.col_sample == 1: 556 input_X = self.X_ 557 else: 558 n_features = self.X_.shape[1] 559 new_n_features = int(np.ceil(n_features * self.col_sample)) 560 assert ( 561 new_n_features >= 1 562 ), "check class attribute 'col_sample' and the number of covariates provided for X" 563 np.random.seed(self.seed) 564 index_col = np.random.choice( 565 range(n_features), size=new_n_features, replace=False 566 ) 567 self.index_col_ = index_col 568 input_X = self.X_[:, self.index_col_] 569 570 else: # X is not None # keep X vs self.X_ 571 if isinstance(X, pd.DataFrame): 572 X = copy.deepcopy(X.values.astype(float)) 573 574 if self.col_sample == 1: 575 input_X = X 576 else: 577 n_features = X.shape[1] 578 new_n_features = int(np.ceil(n_features * self.col_sample)) 579 assert ( 580 new_n_features >= 1 581 ), "check class attribute 'col_sample' and the number of covariates provided for X" 582 np.random.seed(self.seed) 583 index_col = np.random.choice( 584 range(n_features), size=new_n_features, replace=False 585 ) 586 self.index_col_ = index_col 587 input_X = X[:, self.index_col_] 588 589 if self.n_clusters <= 0: 590 # data without any clustering: self.n_clusters is None ----- 591 592 if self.n_hidden_features > 0: # with hidden layer 593 self.nn_scaler_, scaled_X = mo.scale_covariates( 594 input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_ 595 ) 596 Phi_X = ( 597 self.create_layer(scaled_X) 598 if W is None 599 else self.create_layer(scaled_X, W=W) 600 ) 601 Z = ( 602 mo.cbind(input_X, Phi_X, backend=self.backend) 603 if self.direct_link is True 604 else Phi_X 605 ) 606 self.scaler_, scaled_Z = mo.scale_covariates( 607 Z, choice=self.type_scaling[0], scaler=self.scaler_ 608 ) 609 else: # no hidden layer 610 Z = input_X 611 self.scaler_, scaled_Z = mo.scale_covariates( 612 Z, choice=self.type_scaling[0], scaler=self.scaler_ 613 ) 614 615 else: 616 # data with clustering: self.n_clusters is not None ----- # keep 617 618 augmented_X = mo.cbind( 619 input_X, 620 self.encode_clusters(input_X, **kwargs), 621 backend=self.backend, 622 ) 623 624 if self.n_hidden_features > 0: # with hidden layer 625 self.nn_scaler_, scaled_X = mo.scale_covariates( 626 augmented_X, 627 choice=self.type_scaling[1], 628 scaler=self.nn_scaler_, 629 ) 630 Phi_X = ( 631 self.create_layer(scaled_X) 632 if W is None 633 else self.create_layer(scaled_X, W=W) 634 ) 635 Z = ( 636 mo.cbind(augmented_X, Phi_X, backend=self.backend) 637 if self.direct_link is True 638 else Phi_X 639 ) 640 self.scaler_, scaled_Z = mo.scale_covariates( 641 Z, choice=self.type_scaling[0], scaler=self.scaler_ 642 ) 643 else: # no hidden layer 644 Z = augmented_X 645 self.scaler_, scaled_Z = mo.scale_covariates( 646 Z, choice=self.type_scaling[0], scaler=self.scaler_ 647 ) 648 649 # Returning model inputs ----- 650 if mx.is_factor(y) is False: # regression 651 # center y 652 if y is None: 653 self.y_mean_, centered_y = mo.center_response(self.y_) 654 else: 655 self.y_mean_, centered_y = mo.center_response(y) 656 657 # y is subsampled 658 if self.row_sample < 1: 659 n, p = Z.shape 660 661 self.subsampler_ = ( 662 SubSampler( 663 y=self.y_, row_sample=self.row_sample, seed=self.seed 664 ) 665 if y is None 666 else SubSampler( 667 y=y, row_sample=self.row_sample, seed=self.seed 668 ) 669 ) 670 671 self.index_row_ = self.subsampler_.subsample() 672 673 n_row_sample = len(self.index_row_) 674 # regression 675 return ( 676 centered_y[self.index_row_].reshape(n_row_sample), 677 self.scaler_.transform( 678 Z[self.index_row_, :].reshape(n_row_sample, p) 679 ), 680 ) 681 # y is not subsampled 682 # regression 683 return (centered_y, self.scaler_.transform(Z)) 684 685 # classification 686 # y is subsampled 687 if self.row_sample < 1: 688 n, p = Z.shape 689 690 self.subsampler_ = ( 691 SubSampler( 692 y=self.y_, row_sample=self.row_sample, seed=self.seed 693 ) 694 if y is None 695 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 696 ) 697 698 self.index_row_ = self.subsampler_.subsample() 699 700 n_row_sample = len(self.index_row_) 701 # classification 702 return ( 703 y[self.index_row_].reshape(n_row_sample), 704 self.scaler_.transform( 705 Z[self.index_row_, :].reshape(n_row_sample, p) 706 ), 707 ) 708 # y is not subsampled 709 # classification 710 return (y, self.scaler_.transform(Z))
Create new hidden features for training set, with hidden layer, center the response.
Parameters:
y: array-like, shape = [n_samples]
Target values
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
W: {array-like}, shape = [n_features, hidden_features]
if provided, constructs the hidden layer via W
Returns:
(centered response, direct link + hidden layer matrix): {tuple}
712 def cook_test_set(self, X, **kwargs): 713 """Transform data from test set, with hidden layer. 714 715 Parameters: 716 717 X: {array-like}, shape = [n_samples, n_features] 718 Training vectors, where n_samples is the number 719 of samples and n_features is the number of features 720 721 **kwargs: additional parameters to be passed to self.encode_cluster 722 723 Returns: 724 725 Transformed test set : {array-like} 726 """ 727 728 if isinstance(X, pd.DataFrame): 729 X = copy.deepcopy(X.values.astype(float)) 730 731 if len(X.shape) == 1: 732 X = X.reshape(1, -1) 733 734 if ( 735 self.n_clusters == 0 736 ): # data without clustering: self.n_clusters is None ----- 737 if self.n_hidden_features > 0: 738 # if hidden layer 739 scaled_X = ( 740 self.nn_scaler_.transform(X) 741 if (self.col_sample == 1) 742 else self.nn_scaler_.transform(X[:, self.index_col_]) 743 ) 744 Phi_X = self.create_layer(scaled_X, self.W_) 745 if self.direct_link: 746 return self.scaler_.transform( 747 mo.cbind(scaled_X, Phi_X, backend=self.backend) 748 ) 749 # when self.direct_link == False 750 return self.scaler_.transform(Phi_X) 751 # if no hidden layer # self.n_hidden_features == 0 752 return self.scaler_.transform(X) 753 754 # data with clustering: self.n_clusters > 0 ----- 755 if self.col_sample == 1: 756 predicted_clusters = self.encode_clusters( 757 X=X, predict=True, **kwargs 758 ) 759 augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend) 760 else: 761 predicted_clusters = self.encode_clusters( 762 X=X[:, self.index_col_], predict=True, **kwargs 763 ) 764 augmented_X = mo.cbind( 765 X[:, self.index_col_], predicted_clusters, backend=self.backend 766 ) 767 768 if self.n_hidden_features > 0: # if hidden layer 769 scaled_X = self.nn_scaler_.transform(augmented_X) 770 Phi_X = self.create_layer(scaled_X, self.W_) 771 if self.direct_link: 772 return self.scaler_.transform( 773 mo.cbind(augmented_X, Phi_X, backend=self.backend) 774 ) 775 return self.scaler_.transform(Phi_X) 776 777 # if no hidden layer 778 return self.scaler_.transform(augmented_X)
Transform data from test set, with hidden layer.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
**kwargs: additional parameters to be passed to self.encode_cluster
Returns:
Transformed test set : {array-like}
15class BaseRegressor(Base, RegressorMixin): 16 """Random Vector Functional Link Network regression without shrinkage 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton', 31 'uniform' 32 33 bias: boolean 34 indicates if the hidden layer contains a bias term (True) or 35 not (False) 36 37 dropout: float 38 regularization parameter; (random) percentage of nodes dropped out 39 of the training 40 41 direct_link: boolean 42 indicates if the original features are included (True) in model's 43 fitting or not (False) 44 45 n_clusters: int 46 number of clusters for type_clust='kmeans' or type_clust='gmm' 47 clustering (could be 0: no clustering) 48 49 cluster_encode: bool 50 defines how the variable containing clusters is treated (default is one-hot); 51 if `False`, then labels are used, without one-hot encoding 52 53 type_clust: str 54 type of clustering method: currently k-means ('kmeans') or Gaussian 55 Mixture Model ('gmm') 56 57 type_scaling: a tuple of 3 strings 58 scaling methods for inputs, hidden layer, and clustering respectively 59 (and when relevant). 60 Currently available: standardization ('std') or MinMax scaling ('minmax') 61 62 col_sample: float 63 percentage of features randomly chosen for training 64 65 row_sample: float 66 percentage of rows chosen for training, by stratified bootstrapping 67 68 seed: int 69 reproducibility seed for nodes_sim=='uniform', clustering and dropout 70 71 backend: str 72 "cpu" or "gpu" or "tpu" 73 74 Attributes: 75 76 beta_: vector 77 regression coefficients 78 79 GCV_: float 80 Generalized Cross-Validation error 81 82 """ 83 84 # construct the object ----- 85 86 def __init__( 87 self, 88 n_hidden_features=5, 89 activation_name="relu", 90 a=0.01, 91 nodes_sim="sobol", 92 bias=True, 93 dropout=0, 94 direct_link=True, 95 n_clusters=2, 96 cluster_encode=True, 97 type_clust="kmeans", 98 type_scaling=("std", "std", "std"), 99 col_sample=1, 100 row_sample=1, 101 seed=123, 102 backend="cpu", 103 ): 104 super().__init__( 105 n_hidden_features=n_hidden_features, 106 activation_name=activation_name, 107 a=a, 108 nodes_sim=nodes_sim, 109 bias=bias, 110 dropout=dropout, 111 direct_link=direct_link, 112 n_clusters=n_clusters, 113 cluster_encode=cluster_encode, 114 type_clust=type_clust, 115 type_scaling=type_scaling, 116 col_sample=col_sample, 117 row_sample=row_sample, 118 seed=seed, 119 backend=backend, 120 ) 121 122 def fit(self, X, y, **kwargs): 123 """Fit BaseRegressor to training data (X, y) 124 125 Parameters: 126 127 X: {array-like}, shape = [n_samples, n_features] 128 Training vectors, where n_samples is the number 129 of samples and n_features is the number of features 130 131 y: array-like, shape = [n_samples] 132 Target values 133 134 **kwargs: additional parameters to be passed to self.cook_training_set 135 136 Returns: 137 138 self: object 139 """ 140 141 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 142 143 fit_obj = lmf.beta_Sigma_hat( 144 X=scaled_Z, y=centered_y, backend=self.backend 145 ) 146 147 self.beta_ = fit_obj["beta_hat"] 148 149 self.GCV_ = fit_obj["GCV"] 150 151 return self 152 153 def predict(self, X, **kwargs): 154 """Predict test data X. 155 156 Parameters: 157 158 X: {array-like}, shape = [n_samples, n_features] 159 Training vectors, where n_samples is the number 160 of samples and n_features is the number of features 161 162 **kwargs: additional parameters to be passed to self.cook_test_set 163 164 Returns: 165 166 model predictions: {array-like} 167 """ 168 169 if len(X.shape) == 1: 170 n_features = X.shape[0] 171 new_X = mo.rbind( 172 X.reshape(1, n_features), 173 np.ones(n_features).reshape(1, n_features), 174 ) 175 176 return ( 177 self.y_mean_ 178 + mo.safe_sparse_dot( 179 a=self.cook_test_set(new_X, **kwargs), 180 b=self.beta_, 181 backend=self.backend, 182 ) 183 )[0] 184 185 return self.y_mean_ + mo.safe_sparse_dot( 186 a=self.cook_test_set(X, **kwargs), 187 b=self.beta_, 188 backend=self.backend, 189 )
Random Vector Functional Link Network regression without shrinkage
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or
not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for type_clust='kmeans' or type_clust='gmm'
clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot);
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of features randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform', clustering and dropout
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: vector
regression coefficients
GCV_: float
Generalized Cross-Validation error
122 def fit(self, X, y, **kwargs): 123 """Fit BaseRegressor to training data (X, y) 124 125 Parameters: 126 127 X: {array-like}, shape = [n_samples, n_features] 128 Training vectors, where n_samples is the number 129 of samples and n_features is the number of features 130 131 y: array-like, shape = [n_samples] 132 Target values 133 134 **kwargs: additional parameters to be passed to self.cook_training_set 135 136 Returns: 137 138 self: object 139 """ 140 141 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 142 143 fit_obj = lmf.beta_Sigma_hat( 144 X=scaled_Z, y=centered_y, backend=self.backend 145 ) 146 147 self.beta_ = fit_obj["beta_hat"] 148 149 self.GCV_ = fit_obj["GCV"] 150 151 return self
Fit BaseRegressor to training data (X, y)
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
y: array-like, shape = [n_samples]
Target values
**kwargs: additional parameters to be passed to self.cook_training_set
Returns:
self: object
153 def predict(self, X, **kwargs): 154 """Predict test data X. 155 156 Parameters: 157 158 X: {array-like}, shape = [n_samples, n_features] 159 Training vectors, where n_samples is the number 160 of samples and n_features is the number of features 161 162 **kwargs: additional parameters to be passed to self.cook_test_set 163 164 Returns: 165 166 model predictions: {array-like} 167 """ 168 169 if len(X.shape) == 1: 170 n_features = X.shape[0] 171 new_X = mo.rbind( 172 X.reshape(1, n_features), 173 np.ones(n_features).reshape(1, n_features), 174 ) 175 176 return ( 177 self.y_mean_ 178 + mo.safe_sparse_dot( 179 a=self.cook_test_set(new_X, **kwargs), 180 b=self.beta_, 181 backend=self.backend, 182 ) 183 )[0] 184 185 return self.y_mean_ + mo.safe_sparse_dot( 186 a=self.cook_test_set(X, **kwargs), 187 b=self.beta_, 188 backend=self.backend, 189 )
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
**kwargs: additional parameters to be passed to self.cook_test_set
Returns:
model predictions: {array-like}
15class BayesianRVFLRegressor(Base, RegressorMixin): 16 """Bayesian Random Vector Functional Link Network regression with one prior 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform' 31 32 bias: boolean 33 indicates if the hidden layer contains a bias term (True) or not (False) 34 35 dropout: float 36 regularization parameter; (random) percentage of nodes dropped out 37 of the training 38 39 direct_link: boolean 40 indicates if the original features are included (True) in model''s fitting or not (False) 41 42 n_clusters: int 43 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering) 44 45 cluster_encode: bool 46 defines how the variable containing clusters is treated (default is one-hot) 47 if `False`, then labels are used, without one-hot encoding 48 49 type_clust: str 50 type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm') 51 52 type_scaling: a tuple of 3 strings 53 scaling methods for inputs, hidden layer, and clustering respectively 54 (and when relevant). 55 Currently available: standardization ('std') or MinMax scaling ('minmax') 56 57 seed: int 58 reproducibility seed for nodes_sim=='uniform' 59 60 s: float 61 std. dev. of regression parameters in Bayesian Ridge Regression 62 63 sigma: float 64 std. dev. of residuals in Bayesian Ridge Regression 65 66 return_std: boolean 67 if True, uncertainty around predictions is evaluated 68 69 backend: str 70 "cpu" or "gpu" or "tpu" 71 72 Attributes: 73 74 beta_: array-like 75 regression''s coefficients 76 77 Sigma_: array-like 78 covariance of the distribution of fitted parameters 79 80 GCV_: float 81 Generalized cross-validation error 82 83 y_mean_: float 84 average response 85 86 Examples: 87 88 ```python 89 TBD 90 ``` 91 92 """ 93 94 # construct the object ----- 95 96 def __init__( 97 self, 98 n_hidden_features=5, 99 activation_name="relu", 100 a=0.01, 101 nodes_sim="sobol", 102 bias=True, 103 dropout=0, 104 direct_link=True, 105 n_clusters=2, 106 cluster_encode=True, 107 type_clust="kmeans", 108 type_scaling=("std", "std", "std"), 109 seed=123, 110 s=0.1, 111 sigma=0.05, 112 return_std=True, 113 backend="cpu", 114 ): 115 super().__init__( 116 n_hidden_features=n_hidden_features, 117 activation_name=activation_name, 118 a=a, 119 nodes_sim=nodes_sim, 120 bias=bias, 121 dropout=dropout, 122 direct_link=direct_link, 123 n_clusters=n_clusters, 124 cluster_encode=cluster_encode, 125 type_clust=type_clust, 126 type_scaling=type_scaling, 127 seed=seed, 128 backend=backend, 129 ) 130 self.s = s 131 self.sigma = sigma 132 self.beta_ = None 133 self.Sigma_ = None 134 self.GCV_ = None 135 self.return_std = return_std 136 137 def fit(self, X, y, **kwargs): 138 """Fit BayesianRVFLRegressor to training data (X, y). 139 140 Parameters: 141 142 X: {array-like}, shape = [n_samples, n_features] 143 Training vectors, where n_samples is the number 144 of samples and n_features is the number of features. 145 146 y: array-like, shape = [n_samples] 147 Target values. 148 149 **kwargs: additional parameters to be passed to 150 self.cook_training_set 151 152 Returns: 153 154 self: object 155 156 """ 157 158 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 159 160 fit_obj = lmf.beta_Sigma_hat_rvfl( 161 X=scaled_Z, 162 y=centered_y, 163 s=self.s, 164 sigma=self.sigma, 165 fit_intercept=False, 166 return_cov=self.return_std, 167 backend=self.backend, 168 ) 169 170 self.beta_ = fit_obj["beta_hat"] 171 172 if self.return_std == True: 173 self.Sigma_ = fit_obj["Sigma_hat"] 174 175 self.GCV_ = fit_obj["GCV"] 176 177 return self 178 179 def predict(self, X, return_std=False, **kwargs): 180 """Predict test data X. 181 182 Parameters: 183 184 X: {array-like}, shape = [n_samples, n_features] 185 Training vectors, where n_samples is the number 186 of samples and n_features is the number of features. 187 188 return_std: {boolean}, standard dev. is returned or not 189 190 **kwargs: additional parameters to be passed to 191 self.cook_test_set 192 193 Returns: 194 195 model predictions: {array-like} 196 197 """ 198 199 if len(X.shape) == 1: # one observation in the test set only 200 n_features = X.shape[0] 201 new_X = mo.rbind( 202 x=X.reshape(1, n_features), 203 y=np.ones(n_features).reshape(1, n_features), 204 backend=self.backend, 205 ) 206 207 self.return_std = return_std 208 209 if self.return_std == False: 210 if len(X.shape) == 1: 211 return ( 212 self.y_mean_ 213 + mo.safe_sparse_dot( 214 a=self.cook_test_set(new_X, **kwargs), 215 b=self.beta_, 216 backend=self.backend, 217 ) 218 )[0] 219 220 return self.y_mean_ + mo.safe_sparse_dot( 221 a=self.cook_test_set(X, **kwargs), 222 b=self.beta_, 223 backend=self.backend, 224 ) 225 226 else: # confidence interval required for preds? 227 if len(X.shape) == 1: 228 Z = self.cook_test_set(new_X, **kwargs) 229 230 pred_obj = lmf.beta_Sigma_hat_rvfl( 231 s=self.s, 232 sigma=self.sigma, 233 X_star=Z, 234 return_cov=True, 235 beta_hat_=self.beta_, 236 Sigma_hat_=self.Sigma_, 237 backend=self.backend, 238 ) 239 240 return ( 241 self.y_mean_ + pred_obj["preds"][0], 242 pred_obj["preds_std"][0], 243 ) 244 245 Z = self.cook_test_set(X, **kwargs) 246 247 pred_obj = lmf.beta_Sigma_hat_rvfl( 248 s=self.s, 249 sigma=self.sigma, 250 X_star=Z, 251 return_cov=True, 252 beta_hat_=self.beta_, 253 Sigma_hat_=self.Sigma_, 254 backend=self.backend, 255 ) 256 257 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Bayesian Random Vector Functional Link Network regression with one prior
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model''s fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
seed: int
reproducibility seed for nodes_sim=='uniform'
s: float
std. dev. of regression parameters in Bayesian Ridge Regression
sigma: float
std. dev. of residuals in Bayesian Ridge Regression
return_std: boolean
if True, uncertainty around predictions is evaluated
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: array-like
regression''s coefficients
Sigma_: array-like
covariance of the distribution of fitted parameters
GCV_: float
Generalized cross-validation error
y_mean_: float
average response
Examples:
TBD
137 def fit(self, X, y, **kwargs): 138 """Fit BayesianRVFLRegressor to training data (X, y). 139 140 Parameters: 141 142 X: {array-like}, shape = [n_samples, n_features] 143 Training vectors, where n_samples is the number 144 of samples and n_features is the number of features. 145 146 y: array-like, shape = [n_samples] 147 Target values. 148 149 **kwargs: additional parameters to be passed to 150 self.cook_training_set 151 152 Returns: 153 154 self: object 155 156 """ 157 158 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 159 160 fit_obj = lmf.beta_Sigma_hat_rvfl( 161 X=scaled_Z, 162 y=centered_y, 163 s=self.s, 164 sigma=self.sigma, 165 fit_intercept=False, 166 return_cov=self.return_std, 167 backend=self.backend, 168 ) 169 170 self.beta_ = fit_obj["beta_hat"] 171 172 if self.return_std == True: 173 self.Sigma_ = fit_obj["Sigma_hat"] 174 175 self.GCV_ = fit_obj["GCV"] 176 177 return self
Fit BayesianRVFLRegressor to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set
Returns:
self: object
179 def predict(self, X, return_std=False, **kwargs): 180 """Predict test data X. 181 182 Parameters: 183 184 X: {array-like}, shape = [n_samples, n_features] 185 Training vectors, where n_samples is the number 186 of samples and n_features is the number of features. 187 188 return_std: {boolean}, standard dev. is returned or not 189 190 **kwargs: additional parameters to be passed to 191 self.cook_test_set 192 193 Returns: 194 195 model predictions: {array-like} 196 197 """ 198 199 if len(X.shape) == 1: # one observation in the test set only 200 n_features = X.shape[0] 201 new_X = mo.rbind( 202 x=X.reshape(1, n_features), 203 y=np.ones(n_features).reshape(1, n_features), 204 backend=self.backend, 205 ) 206 207 self.return_std = return_std 208 209 if self.return_std == False: 210 if len(X.shape) == 1: 211 return ( 212 self.y_mean_ 213 + mo.safe_sparse_dot( 214 a=self.cook_test_set(new_X, **kwargs), 215 b=self.beta_, 216 backend=self.backend, 217 ) 218 )[0] 219 220 return self.y_mean_ + mo.safe_sparse_dot( 221 a=self.cook_test_set(X, **kwargs), 222 b=self.beta_, 223 backend=self.backend, 224 ) 225 226 else: # confidence interval required for preds? 227 if len(X.shape) == 1: 228 Z = self.cook_test_set(new_X, **kwargs) 229 230 pred_obj = lmf.beta_Sigma_hat_rvfl( 231 s=self.s, 232 sigma=self.sigma, 233 X_star=Z, 234 return_cov=True, 235 beta_hat_=self.beta_, 236 Sigma_hat_=self.Sigma_, 237 backend=self.backend, 238 ) 239 240 return ( 241 self.y_mean_ + pred_obj["preds"][0], 242 pred_obj["preds_std"][0], 243 ) 244 245 Z = self.cook_test_set(X, **kwargs) 246 247 pred_obj = lmf.beta_Sigma_hat_rvfl( 248 s=self.s, 249 sigma=self.sigma, 250 X_star=Z, 251 return_cov=True, 252 beta_hat_=self.beta_, 253 Sigma_hat_=self.Sigma_, 254 backend=self.backend, 255 ) 256 257 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
return_std: {boolean}, standard dev. is returned or not
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
15class BayesianRVFL2Regressor(Base, RegressorMixin): 16 """Bayesian Random Vector Functional Link Network regression with two priors 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform' 31 32 bias: boolean 33 indicates if the hidden layer contains a bias term (True) or not (False) 34 35 dropout: float 36 regularization parameter; (random) percentage of nodes dropped out 37 of the training 38 39 direct_link: boolean 40 indicates if the original features are included (True) in model''s fitting or not (False) 41 42 n_clusters: int 43 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering) 44 45 cluster_encode: bool 46 defines how the variable containing clusters is treated (default is one-hot) 47 if `False`, then labels are used, without one-hot encoding 48 49 type_clust: str 50 type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm') 51 52 type_scaling: a tuple of 3 strings 53 scaling methods for inputs, hidden layer, and clustering respectively 54 (and when relevant). 55 Currently available: standardization ('std') or MinMax scaling ('minmax') 56 57 seed: int 58 reproducibility seed for nodes_sim=='uniform' 59 60 s1: float 61 std. dev. of init. regression parameters in Bayesian Ridge Regression 62 63 s2: float 64 std. dev. of augmented regression parameters in Bayesian Ridge Regression 65 66 sigma: float 67 std. dev. of residuals in Bayesian Ridge Regression 68 69 return_std: boolean 70 if True, uncertainty around predictions is evaluated 71 72 backend: str 73 "cpu" or "gpu" or "tpu" 74 75 Attributes: 76 77 beta_: array-like 78 regression''s coefficients 79 80 Sigma_: array-like 81 covariance of the distribution of fitted parameters 82 83 GCV_: float 84 Generalized cross-validation error 85 86 y_mean_: float 87 average response 88 89 Examples: 90 91 ```python 92 TBD 93 ``` 94 95 """ 96 97 # construct the object ----- 98 99 def __init__( 100 self, 101 n_hidden_features=5, 102 activation_name="relu", 103 a=0.01, 104 nodes_sim="sobol", 105 bias=True, 106 dropout=0, 107 direct_link=True, 108 n_clusters=0, 109 cluster_encode=True, 110 type_clust="kmeans", 111 type_scaling=("std", "std", "std"), 112 seed=123, 113 s1=0.1, 114 s2=0.1, 115 sigma=0.05, 116 return_std=True, 117 backend="cpu", 118 ): 119 super().__init__( 120 n_hidden_features=n_hidden_features, 121 activation_name=activation_name, 122 a=a, 123 nodes_sim=nodes_sim, 124 bias=bias, 125 dropout=dropout, 126 direct_link=direct_link, 127 n_clusters=n_clusters, 128 cluster_encode=cluster_encode, 129 type_clust=type_clust, 130 type_scaling=type_scaling, 131 seed=seed, 132 backend=backend, 133 ) 134 135 self.s1 = s1 136 self.s2 = s2 137 self.sigma = sigma 138 self.beta_ = None 139 self.Sigma_ = None 140 self.GCV_ = None 141 self.return_std = return_std 142 self.coef_ = None 143 144 def fit(self, X, y, **kwargs): 145 """Fit BayesianRVFL2Regressor to training data (X, y) 146 147 Parameters: 148 149 X: {array-like}, shape = [n_samples, n_features] 150 Training vectors, where n_samples is the number 151 of samples and n_features is the number of features 152 153 y: array-like, shape = [n_samples] 154 Target values 155 156 **kwargs: additional parameters to be passed to 157 self.cook_training_set 158 159 Returns: 160 161 self: object 162 163 """ 164 165 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 166 167 n, p = X.shape 168 q = self.n_hidden_features 169 170 if self.direct_link == True: 171 r = p + self.n_clusters 172 173 block11 = (self.s1**2) * np.eye(r) 174 block12 = np.zeros((r, q)) 175 block21 = np.zeros((q, r)) 176 block22 = (self.s2**2) * np.eye(q) 177 178 Sigma_prior = mo.rbind( 179 x=mo.cbind(x=block11, y=block12, backend=self.backend), 180 y=mo.cbind(x=block21, y=block22, backend=self.backend), 181 backend=self.backend, 182 ) 183 184 else: 185 Sigma_prior = (self.s2**2) * np.eye(q) 186 187 fit_obj = lmf.beta_Sigma_hat_rvfl2( 188 X=scaled_Z, 189 y=centered_y, 190 Sigma=Sigma_prior, 191 sigma=self.sigma, 192 fit_intercept=False, 193 return_cov=self.return_std, 194 backend=self.backend, 195 ) 196 197 self.beta_ = fit_obj["beta_hat"] 198 199 self.coef_ = self.beta_ 200 201 if self.return_std == True: 202 self.Sigma_ = fit_obj["Sigma_hat"] 203 204 self.GCV_ = fit_obj["GCV"] 205 206 return self 207 208 def predict(self, X, return_std=False, **kwargs): 209 """Predict test data X. 210 211 Parameters: 212 213 X: {array-like}, shape = [n_samples, n_features] 214 Training vectors, where n_samples is the number 215 of samples and n_features is the number of features. 216 217 return_std: {boolean}, standard dev. is returned or not 218 219 **kwargs: additional parameters to be passed to 220 self.cook_test_set 221 222 Returns: 223 224 model predictions: {array-like} 225 226 """ 227 228 if len(X.shape) == 1: # one observation in the test set only 229 n_features = X.shape[0] 230 new_X = mo.rbind( 231 x=X.reshape(1, n_features), 232 y=np.ones(n_features).reshape(1, n_features), 233 backend=self.backend, 234 ) 235 236 self.return_std = return_std 237 238 if self.return_std == False: 239 if len(X.shape) == 1: 240 return ( 241 self.y_mean_ 242 + mo.safe_sparse_dot( 243 self.cook_test_set(new_X, **kwargs), 244 self.beta_, 245 backend=self.backend, 246 ) 247 )[0] 248 249 return self.y_mean_ + mo.safe_sparse_dot( 250 self.cook_test_set(X, **kwargs), 251 self.beta_, 252 backend=self.backend, 253 ) 254 255 else: # confidence interval required for preds? 256 if len(X.shape) == 1: 257 Z = self.cook_test_set(new_X, **kwargs) 258 259 pred_obj = lmf.beta_Sigma_hat_rvfl2( 260 X_star=Z, 261 return_cov=self.return_std, 262 beta_hat_=self.beta_, 263 Sigma_hat_=self.Sigma_, 264 backend=self.backend, 265 ) 266 267 return ( 268 self.y_mean_ + pred_obj["preds"][0], 269 pred_obj["preds_std"][0], 270 ) 271 272 Z = self.cook_test_set(X, **kwargs) 273 274 pred_obj = lmf.beta_Sigma_hat_rvfl2( 275 X_star=Z, 276 return_cov=self.return_std, 277 beta_hat_=self.beta_, 278 Sigma_hat_=self.Sigma_, 279 backend=self.backend, 280 ) 281 282 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Bayesian Random Vector Functional Link Network regression with two priors
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model''s fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
seed: int
reproducibility seed for nodes_sim=='uniform'
s1: float
std. dev. of init. regression parameters in Bayesian Ridge Regression
s2: float
std. dev. of augmented regression parameters in Bayesian Ridge Regression
sigma: float
std. dev. of residuals in Bayesian Ridge Regression
return_std: boolean
if True, uncertainty around predictions is evaluated
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: array-like
regression''s coefficients
Sigma_: array-like
covariance of the distribution of fitted parameters
GCV_: float
Generalized cross-validation error
y_mean_: float
average response
Examples:
TBD
144 def fit(self, X, y, **kwargs): 145 """Fit BayesianRVFL2Regressor to training data (X, y) 146 147 Parameters: 148 149 X: {array-like}, shape = [n_samples, n_features] 150 Training vectors, where n_samples is the number 151 of samples and n_features is the number of features 152 153 y: array-like, shape = [n_samples] 154 Target values 155 156 **kwargs: additional parameters to be passed to 157 self.cook_training_set 158 159 Returns: 160 161 self: object 162 163 """ 164 165 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 166 167 n, p = X.shape 168 q = self.n_hidden_features 169 170 if self.direct_link == True: 171 r = p + self.n_clusters 172 173 block11 = (self.s1**2) * np.eye(r) 174 block12 = np.zeros((r, q)) 175 block21 = np.zeros((q, r)) 176 block22 = (self.s2**2) * np.eye(q) 177 178 Sigma_prior = mo.rbind( 179 x=mo.cbind(x=block11, y=block12, backend=self.backend), 180 y=mo.cbind(x=block21, y=block22, backend=self.backend), 181 backend=self.backend, 182 ) 183 184 else: 185 Sigma_prior = (self.s2**2) * np.eye(q) 186 187 fit_obj = lmf.beta_Sigma_hat_rvfl2( 188 X=scaled_Z, 189 y=centered_y, 190 Sigma=Sigma_prior, 191 sigma=self.sigma, 192 fit_intercept=False, 193 return_cov=self.return_std, 194 backend=self.backend, 195 ) 196 197 self.beta_ = fit_obj["beta_hat"] 198 199 self.coef_ = self.beta_ 200 201 if self.return_std == True: 202 self.Sigma_ = fit_obj["Sigma_hat"] 203 204 self.GCV_ = fit_obj["GCV"] 205 206 return self
Fit BayesianRVFL2Regressor to training data (X, y)
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
y: array-like, shape = [n_samples]
Target values
**kwargs: additional parameters to be passed to
self.cook_training_set
Returns:
self: object
208 def predict(self, X, return_std=False, **kwargs): 209 """Predict test data X. 210 211 Parameters: 212 213 X: {array-like}, shape = [n_samples, n_features] 214 Training vectors, where n_samples is the number 215 of samples and n_features is the number of features. 216 217 return_std: {boolean}, standard dev. is returned or not 218 219 **kwargs: additional parameters to be passed to 220 self.cook_test_set 221 222 Returns: 223 224 model predictions: {array-like} 225 226 """ 227 228 if len(X.shape) == 1: # one observation in the test set only 229 n_features = X.shape[0] 230 new_X = mo.rbind( 231 x=X.reshape(1, n_features), 232 y=np.ones(n_features).reshape(1, n_features), 233 backend=self.backend, 234 ) 235 236 self.return_std = return_std 237 238 if self.return_std == False: 239 if len(X.shape) == 1: 240 return ( 241 self.y_mean_ 242 + mo.safe_sparse_dot( 243 self.cook_test_set(new_X, **kwargs), 244 self.beta_, 245 backend=self.backend, 246 ) 247 )[0] 248 249 return self.y_mean_ + mo.safe_sparse_dot( 250 self.cook_test_set(X, **kwargs), 251 self.beta_, 252 backend=self.backend, 253 ) 254 255 else: # confidence interval required for preds? 256 if len(X.shape) == 1: 257 Z = self.cook_test_set(new_X, **kwargs) 258 259 pred_obj = lmf.beta_Sigma_hat_rvfl2( 260 X_star=Z, 261 return_cov=self.return_std, 262 beta_hat_=self.beta_, 263 Sigma_hat_=self.Sigma_, 264 backend=self.backend, 265 ) 266 267 return ( 268 self.y_mean_ + pred_obj["preds"][0], 269 pred_obj["preds_std"][0], 270 ) 271 272 Z = self.cook_test_set(X, **kwargs) 273 274 pred_obj = lmf.beta_Sigma_hat_rvfl2( 275 X_star=Z, 276 return_cov=self.return_std, 277 beta_hat_=self.beta_, 278 Sigma_hat_=self.Sigma_, 279 backend=self.backend, 280 ) 281 282 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
return_std: {boolean}, standard dev. is returned or not
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
42class ClassicalMTS(MTS): 43 """Time series with statistical models (statsmodels), mostly for benchmarks 44 45 Parameters: 46 47 model: type of model: str. 48 currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta' 49 Default is None 50 51 obj: object 52 A time series model from statsmodels 53 54 Attributes: 55 56 df_: data frame 57 the input data frame, in case a data.frame is provided to `fit` 58 59 level_: int 60 level of confidence for prediction intervals (default is 95) 61 62 Examples: 63 See examples/classical_mts_timeseries.py 64 """ 65 66 # construct the object ----- 67 68 def __init__(self, model="VAR", obj=None): 69 if obj is not None: 70 self.model = None 71 self.obj = obj 72 else: 73 self.model = model 74 if self.model == "VAR": 75 self.obj = VAR 76 elif self.model == "VECM": 77 self.obj = VECM 78 elif self.model == "ARIMA": 79 self.obj = ARIMA 80 elif self.model == "ETS": 81 self.obj = ExponentialSmoothing 82 elif self.model == "Theta": 83 self.obj = ThetaModel 84 else: 85 raise ValueError("model not recognized") 86 self.n_series = None 87 self.replications = None 88 self.mean_ = None 89 self.upper_ = None 90 self.lower_ = None 91 self.output_dates_ = None 92 self.alpha_ = None 93 self.df_ = None 94 self.residuals_ = [] 95 self.sims_ = None 96 self.level_ = None 97 98 def fit(self, X, **kwargs): 99 """Fit ClassicalMTS model to training data X, with optional regressors xreg 100 101 Parameters: 102 103 X: {array-like}, shape = [n_samples, n_features] 104 Training time series, where n_samples is the number 105 of samples and n_features is the number of features; 106 X must be in increasing order (most recent observations last) 107 108 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 109 110 Returns: 111 112 self: object 113 """ 114 115 try: 116 self.n_series = X.shape[1] 117 except Exception: 118 self.n_series = 1 119 120 if (isinstance(X, pd.DataFrame) is False) and isinstance( 121 X, pd.Series 122 ) is False: # input data set is a numpy array 123 X = pd.DataFrame(X) 124 if self.n_series > 1: 125 self.series_names = [ 126 "series" + str(i) for i in range(X.shape[1]) 127 ] 128 else: 129 self.series_names = "series0" 130 131 else: # input data set is a DataFrame or Series with column names 132 X_index = None 133 if X.index is not None and len(X.shape) > 1: 134 X_index = X.index 135 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 136 if X_index is not None: 137 try: 138 X.index = X_index 139 except Exception: 140 pass 141 if isinstance(X, pd.DataFrame): 142 self.series_names = X.columns.tolist() 143 else: 144 self.series_names = X.name 145 146 if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 147 self.df_ = X 148 X = X.values 149 self.df_.columns = self.series_names 150 self.input_dates = ts.compute_input_dates(self.df_) 151 else: 152 self.df_ = pd.DataFrame(X, columns=self.series_names) 153 154 if self.model == "Theta": 155 try: 156 self.obj = self.obj(self.df_, **kwargs).fit() 157 except Exception as e: 158 self.obj = self.obj(self.df_.values, **kwargs).fit() 159 self.residuals_ = None 160 else: 161 self.obj = self.obj(X, **kwargs).fit() 162 try: 163 self.residuals_ = self.obj.resid 164 except Exception as e: # Theta 165 self.residuals_ = None 166 167 return self 168 169 def predict(self, h=5, level=95, **kwargs): 170 """Forecast all the time series, h steps ahead 171 172 Parameters: 173 174 h: {integer} 175 Forecasting horizon 176 177 **kwargs: additional parameters to be passed to 178 self.cook_test_set 179 180 Returns: 181 182 model predictions for horizon = h: {array-like} 183 184 """ 185 186 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 187 self.level_ = level 188 self.lower_ = None # do not remove (/!\) 189 self.upper_ = None # do not remove (/!\) 190 self.sims_ = None # do not remove (/!\) 191 self.level_ = level 192 self.alpha_ = 100 - level 193 194 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 195 196 # Named tuple for forecast results 197 DescribeResult = namedtuple( 198 "DescribeResult", ("mean", "lower", "upper") 199 ) 200 201 if ( 202 self.obj is not None 203 ): # try all the special cases of the else section (there's probably a better way) 204 try: 205 ( 206 mean_forecast, 207 lower_bound, 208 upper_bound, 209 ) = self.obj.forecast_interval( 210 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 211 ) 212 213 except Exception as e: 214 try: 215 forecast_result = self.obj.predict(steps=h) 216 mean_forecast = forecast_result 217 ( 218 lower_bound, 219 upper_bound, 220 ) = self._compute_confidence_intervals( 221 forecast_result, alpha=self.alpha_ / 100, **kwargs 222 ) 223 224 except Exception as e: 225 try: 226 forecast_result = self.obj.get_forecast(steps=h) 227 mean_forecast = forecast_result.predicted_mean 228 lower_bound = forecast_result.conf_int()[:, 0] 229 upper_bound = forecast_result.conf_int()[:, 1] 230 231 except Exception as e: 232 try: 233 forecast_result = self.obj.forecast(steps=h) 234 residuals = self.obj.resid 235 std_errors = np.std(residuals) 236 mean_forecast = forecast_result 237 lower_bound = ( 238 forecast_result - pi_multiplier * std_errors 239 ) 240 upper_bound = ( 241 forecast_result + pi_multiplier * std_errors 242 ) 243 244 except Exception as e: 245 try: 246 mean_forecast = self.obj.forecast( 247 steps=h 248 ).values 249 forecast_result = self.obj.prediction_intervals( 250 steps=h, alpha=self.alpha_ / 100, **kwargs 251 ) 252 lower_bound = forecast_result["lower"].values 253 upper_bound = forecast_result["upper"].values 254 except Exception: 255 mean_forecast = self.obj.forecast(steps=h) 256 forecast_result = self.obj.prediction_intervals( 257 steps=h, alpha=self.alpha_ / 100, **kwargs 258 ) 259 lower_bound = forecast_result["lower"] 260 upper_bound = forecast_result["upper"] 261 262 else: 263 if self.model == "VAR": 264 ( 265 mean_forecast, 266 lower_bound, 267 upper_bound, 268 ) = self.obj.forecast_interval( 269 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 270 ) 271 272 elif self.model == "VECM": 273 forecast_result = self.obj.predict(steps=h) 274 mean_forecast = forecast_result 275 lower_bound, upper_bound = self._compute_confidence_intervals( 276 forecast_result, alpha=self.alpha_ / 100, **kwargs 277 ) 278 279 elif self.model == "ARIMA": 280 forecast_result = self.obj.get_forecast(steps=h) 281 mean_forecast = forecast_result.predicted_mean 282 lower_bound = forecast_result.conf_int()[:, 0] 283 upper_bound = forecast_result.conf_int()[:, 1] 284 285 elif self.model == "ETS": 286 forecast_result = self.obj.forecast(steps=h) 287 residuals = self.obj.resid 288 std_errors = np.std(residuals) 289 mean_forecast = forecast_result 290 lower_bound = forecast_result - pi_multiplier * std_errors 291 upper_bound = forecast_result + pi_multiplier * std_errors 292 293 elif self.model == "Theta": 294 try: 295 mean_forecast = self.obj.forecast(steps=h).values 296 forecast_result = self.obj.prediction_intervals( 297 steps=h, alpha=self.alpha_ / 100, **kwargs 298 ) 299 lower_bound = forecast_result["lower"].values 300 upper_bound = forecast_result["upper"].values 301 except Exception: 302 mean_forecast = self.obj.forecast(steps=h) 303 forecast_result = self.obj.prediction_intervals( 304 steps=h, alpha=self.alpha_ / 100, **kwargs 305 ) 306 lower_bound = forecast_result["lower"] 307 upper_bound = forecast_result["upper"] 308 309 else: 310 raise ValueError("model not recognized") 311 312 try: 313 self.mean_ = pd.DataFrame( 314 mean_forecast, 315 columns=self.series_names, 316 index=self.output_dates_, 317 ) 318 self.lower_ = pd.DataFrame( 319 lower_bound, columns=self.series_names, index=self.output_dates_ 320 ) 321 self.upper_ = pd.DataFrame( 322 upper_bound, columns=self.series_names, index=self.output_dates_ 323 ) 324 except Exception: 325 self.mean_ = pd.Series( 326 mean_forecast, name=self.series_names, index=self.output_dates_ 327 ) 328 self.lower_ = pd.Series( 329 lower_bound, name=self.series_names, index=self.output_dates_ 330 ) 331 self.upper_ = pd.Series( 332 upper_bound, name=self.series_names, index=self.output_dates_ 333 ) 334 335 return DescribeResult( 336 mean=self.mean_, lower=self.lower_, upper=self.upper_ 337 ) 338 339 def _compute_confidence_intervals(self, forecast_result, alpha): 340 """ 341 Compute confidence intervals for VECM forecasts. 342 Uses the covariance of residuals to approximate the confidence intervals. 343 """ 344 residuals = self.obj.resid 345 cov_matrix = np.cov(residuals.T) # Covariance matrix of residuals 346 std_errors = np.sqrt(np.diag(cov_matrix)) # Standard errors 347 348 z_value = norm.ppf(1 - alpha / 2) # Z-score for the given alpha level 349 lower_bound = forecast_result - z_value * std_errors 350 upper_bound = forecast_result + z_value * std_errors 351 352 return lower_bound, upper_bound 353 354 def score(self, X, training_index, testing_index, scoring=None, **kwargs): 355 """Train on training_index, score on testing_index.""" 356 357 assert ( 358 bool(set(training_index).intersection(set(testing_index))) == False 359 ), "Non-overlapping 'training_index' and 'testing_index' required" 360 361 # Dimensions 362 try: 363 # multivariate time series 364 n, p = X.shape 365 except: 366 # univariate time series 367 n = X.shape[0] 368 p = 1 369 370 # Training and testing sets 371 if p > 1: 372 X_train = X[training_index, :] 373 X_test = X[testing_index, :] 374 else: 375 X_train = X[training_index] 376 X_test = X[testing_index] 377 378 # Horizon 379 h = len(testing_index) 380 assert ( 381 len(training_index) + h 382 ) <= n, "Please check lengths of training and testing windows" 383 384 # Fit and predict 385 self.fit(X_train, **kwargs) 386 preds = self.predict(h=h, **kwargs) 387 388 if scoring is None: 389 scoring = "neg_root_mean_squared_error" 390 391 # check inputs 392 assert scoring in ( 393 "explained_variance", 394 "neg_mean_absolute_error", 395 "neg_mean_squared_error", 396 "neg_root_mean_squared_error", 397 "neg_mean_squared_log_error", 398 "neg_median_absolute_error", 399 "r2", 400 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 401 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 402 'neg_median_absolute_error', 'r2')" 403 404 scoring_options = { 405 "explained_variance": skm2.explained_variance_score, 406 "neg_mean_absolute_error": skm2.mean_absolute_error, 407 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 408 "neg_root_mean_squared_error": lambda x, y: np.sqrt( 409 np.mean((x - y) ** 2) 410 ), 411 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 412 "neg_median_absolute_error": skm2.median_absolute_error, 413 "r2": skm2.r2_score, 414 } 415 416 # if p > 1: 417 # return tuple( 418 # [ 419 # scoring_options[scoring]( 420 # X_test[:, i], preds[:, i]#, **kwargs 421 # ) 422 # for i in range(p) 423 # ] 424 # ) 425 # else: 426 return scoring_options[scoring](X_test, preds) 427 428 def plot(self, series=None, type_axis="dates", type_plot="pi"): 429 """Plot time series forecast 430 431 Parameters: 432 433 series: {integer} or {string} 434 series index or name 435 436 """ 437 438 assert all( 439 [ 440 self.mean_ is not None, 441 self.lower_ is not None, 442 self.upper_ is not None, 443 self.output_dates_ is not None, 444 ] 445 ), "model forecasting must be obtained first (with predict)" 446 447 if series is None: 448 assert ( 449 self.n_series == 1 450 ), "please specify series index or name (n_series > 1)" 451 series = 0 452 453 if isinstance(series, str): 454 assert ( 455 series in self.series_names 456 ), f"series {series} doesn't exist in the input dataset" 457 series_idx = self.df_.columns.get_loc(series) 458 else: 459 assert isinstance(series, int) and ( 460 0 <= series < self.n_series 461 ), f"check series index (< {self.n_series})" 462 series_idx = series 463 464 if isinstance(self.df_, pd.DataFrame): 465 y_all = list(self.df_.iloc[:, series_idx]) + list( 466 self.mean_.iloc[:, series_idx] 467 ) 468 y_test = list(self.mean_.iloc[:, series_idx]) 469 else: 470 y_all = list(self.df_.values) + list(self.mean_.values) 471 y_test = list(self.mean_.values) 472 n_points_all = len(y_all) 473 n_points_train = self.df_.shape[0] 474 475 if type_axis == "numeric": 476 x_all = [i for i in range(n_points_all)] 477 x_test = [i for i in range(n_points_train, n_points_all)] 478 479 if type_axis == "dates": # use dates 480 x_all = np.concatenate( 481 (self.input_dates.values, self.output_dates_.values), axis=None 482 ) 483 x_test = self.output_dates_.values 484 485 if type_plot == "pi": 486 fig, ax = plt.subplots() 487 ax.plot(x_all, y_all, "-") 488 ax.plot(x_test, y_test, "-", color="orange") 489 try: 490 ax.fill_between( 491 x_test, 492 self.lower_.iloc[:, series_idx], 493 self.upper_.iloc[:, series_idx], 494 alpha=0.2, 495 color="orange", 496 ) 497 except Exception: 498 ax.fill_between( 499 x_test, 500 self.lower_.values, 501 self.upper_.values, 502 alpha=0.2, 503 color="orange", 504 ) 505 if self.replications is None: 506 if self.n_series > 1: 507 plt.title( 508 f"prediction intervals for {series}", 509 loc="left", 510 fontsize=12, 511 fontweight=0, 512 color="black", 513 ) 514 else: 515 plt.title( 516 f"prediction intervals for input time series", 517 loc="left", 518 fontsize=12, 519 fontweight=0, 520 color="black", 521 ) 522 plt.show() 523 else: # self.replications is not None 524 if self.n_series > 1: 525 plt.title( 526 f"prediction intervals for {self.replications} simulations of {series}", 527 loc="left", 528 fontsize=12, 529 fontweight=0, 530 color="black", 531 ) 532 else: 533 plt.title( 534 f"prediction intervals for {self.replications} simulations of input time series", 535 loc="left", 536 fontsize=12, 537 fontweight=0, 538 color="black", 539 ) 540 plt.show() 541 542 if type_plot == "spaghetti": 543 palette = plt.get_cmap("Set1") 544 sims_ix = getsims(self.sims_, series_idx) 545 plt.plot(x_all, y_all, "-") 546 for col_ix in range( 547 sims_ix.shape[1] 548 ): # avoid this when there are thousands of simulations 549 plt.plot( 550 x_test, 551 sims_ix[:, col_ix], 552 "-", 553 color=palette(col_ix), 554 linewidth=1, 555 alpha=0.9, 556 ) 557 plt.plot(x_all, y_all, "-", color="black") 558 plt.plot(x_test, y_test, "-", color="blue") 559 # Add titles 560 if self.n_series > 1: 561 plt.title( 562 f"{self.replications} simulations of {series}", 563 loc="left", 564 fontsize=12, 565 fontweight=0, 566 color="black", 567 ) 568 else: 569 plt.title( 570 f"{self.replications} simulations of input time series", 571 loc="left", 572 fontsize=12, 573 fontweight=0, 574 color="black", 575 ) 576 plt.xlabel("Time") 577 plt.ylabel("Values") 578 # Show the graph 579 plt.show() 580 581 def cross_val_score( 582 self, 583 X, 584 scoring="root_mean_squared_error", 585 n_jobs=None, 586 verbose=0, 587 xreg=None, 588 initial_window=5, 589 horizon=3, 590 fixed_window=False, 591 show_progress=True, 592 level=95, 593 **kwargs, 594 ): 595 """Evaluate a score by time series cross-validation. 596 597 Parameters: 598 599 X: {array-like, sparse matrix} of shape (n_samples, n_features) 600 The data to fit. 601 602 scoring: str or a function 603 A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 604 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 605 'mean_absolute_percentage_error', 'winkler_score', 'coverage') 606 Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries` 607 608 n_jobs: int, default=None 609 Number of jobs to run in parallel. 610 611 verbose: int, default=0 612 The verbosity level. 613 614 xreg: array-like, optional (default=None) 615 Additional (external) regressors to be passed to `fit` 616 xreg must be in 'increasing' order (most recent observations last) 617 618 initial_window: int 619 initial number of consecutive values in each training set sample 620 621 horizon: int 622 number of consecutive values in test set sample 623 624 fixed_window: boolean 625 if False, all training samples start at index 0, and the training 626 window's size is increasing. 627 if True, the training window's size is fixed, and the window is 628 rolling forward 629 630 show_progress: boolean 631 if True, a progress bar is printed 632 633 **kwargs: dict 634 additional parameters to be passed to `fit` and `predict` 635 636 Returns: 637 638 A tuple: descriptive statistics or errors and raw errors 639 640 """ 641 tscv = TimeSeriesSplit() 642 643 tscv_obj = tscv.split( 644 X, 645 initial_window=initial_window, 646 horizon=horizon, 647 fixed_window=fixed_window, 648 ) 649 650 if isinstance(scoring, str): 651 assert scoring in ( 652 "root_mean_squared_error", 653 "mean_squared_error", 654 "mean_error", 655 "mean_absolute_error", 656 "mean_percentage_error", 657 "mean_absolute_percentage_error", 658 "winkler_score", 659 "coverage", 660 ), "must have scoring in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error', 'winkler_score', 'coverage')" 661 662 def err_func(X_test, X_pred, scoring): 663 if (self.replications is not None) or ( 664 self.type_pi == "gaussian" 665 ): # probabilistic 666 if scoring == "winkler_score": 667 return winkler_score(X_pred, X_test, level=level) 668 elif scoring == "coverage": 669 return coverage(X_pred, X_test, level=level) 670 else: 671 return mean_errors( 672 pred=X_pred.mean, actual=X_test, scoring=scoring 673 ) 674 else: # not probabilistic 675 return mean_errors( 676 pred=X_pred, actual=X_test, scoring=scoring 677 ) 678 679 else: # isinstance(scoring, str) = False 680 err_func = scoring 681 682 errors = [] 683 684 train_indices = [] 685 686 test_indices = [] 687 688 for train_index, test_index in tscv_obj: 689 train_indices.append(train_index) 690 test_indices.append(test_index) 691 692 if show_progress is True: 693 iterator = tqdm( 694 zip(train_indices, test_indices), total=len(train_indices) 695 ) 696 else: 697 iterator = zip(train_indices, test_indices) 698 699 for train_index, test_index in iterator: 700 if verbose == 1: 701 print(f"TRAIN: {train_index}") 702 print(f"TEST: {test_index}") 703 704 if isinstance(X, pd.DataFrame): 705 self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs) 706 X_test = X.iloc[test_index, :] 707 else: 708 self.fit(X[train_index, :], xreg=xreg, **kwargs) 709 X_test = X[test_index, :] 710 X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs) 711 712 errors.append(err_func(X_test, X_pred, scoring)) 713 714 res = np.asarray(errors) 715 716 return res, describe(res)
Time series with statistical models (statsmodels), mostly for benchmarks
Parameters:
model: type of model: str.
currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta'
Default is None
obj: object
A time series model from statsmodels
Attributes:
df_: data frame
the input data frame, in case a data.frame is provided to `fit`
level_: int
level of confidence for prediction intervals (default is 95)
Examples: See examples/classical_mts_timeseries.py
98 def fit(self, X, **kwargs): 99 """Fit ClassicalMTS model to training data X, with optional regressors xreg 100 101 Parameters: 102 103 X: {array-like}, shape = [n_samples, n_features] 104 Training time series, where n_samples is the number 105 of samples and n_features is the number of features; 106 X must be in increasing order (most recent observations last) 107 108 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 109 110 Returns: 111 112 self: object 113 """ 114 115 try: 116 self.n_series = X.shape[1] 117 except Exception: 118 self.n_series = 1 119 120 if (isinstance(X, pd.DataFrame) is False) and isinstance( 121 X, pd.Series 122 ) is False: # input data set is a numpy array 123 X = pd.DataFrame(X) 124 if self.n_series > 1: 125 self.series_names = [ 126 "series" + str(i) for i in range(X.shape[1]) 127 ] 128 else: 129 self.series_names = "series0" 130 131 else: # input data set is a DataFrame or Series with column names 132 X_index = None 133 if X.index is not None and len(X.shape) > 1: 134 X_index = X.index 135 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 136 if X_index is not None: 137 try: 138 X.index = X_index 139 except Exception: 140 pass 141 if isinstance(X, pd.DataFrame): 142 self.series_names = X.columns.tolist() 143 else: 144 self.series_names = X.name 145 146 if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 147 self.df_ = X 148 X = X.values 149 self.df_.columns = self.series_names 150 self.input_dates = ts.compute_input_dates(self.df_) 151 else: 152 self.df_ = pd.DataFrame(X, columns=self.series_names) 153 154 if self.model == "Theta": 155 try: 156 self.obj = self.obj(self.df_, **kwargs).fit() 157 except Exception as e: 158 self.obj = self.obj(self.df_.values, **kwargs).fit() 159 self.residuals_ = None 160 else: 161 self.obj = self.obj(X, **kwargs).fit() 162 try: 163 self.residuals_ = self.obj.resid 164 except Exception as e: # Theta 165 self.residuals_ = None 166 167 return self
Fit ClassicalMTS model to training data X, with optional regressors xreg
Parameters:
X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)
**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
Returns:
self: object
169 def predict(self, h=5, level=95, **kwargs): 170 """Forecast all the time series, h steps ahead 171 172 Parameters: 173 174 h: {integer} 175 Forecasting horizon 176 177 **kwargs: additional parameters to be passed to 178 self.cook_test_set 179 180 Returns: 181 182 model predictions for horizon = h: {array-like} 183 184 """ 185 186 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 187 self.level_ = level 188 self.lower_ = None # do not remove (/!\) 189 self.upper_ = None # do not remove (/!\) 190 self.sims_ = None # do not remove (/!\) 191 self.level_ = level 192 self.alpha_ = 100 - level 193 194 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 195 196 # Named tuple for forecast results 197 DescribeResult = namedtuple( 198 "DescribeResult", ("mean", "lower", "upper") 199 ) 200 201 if ( 202 self.obj is not None 203 ): # try all the special cases of the else section (there's probably a better way) 204 try: 205 ( 206 mean_forecast, 207 lower_bound, 208 upper_bound, 209 ) = self.obj.forecast_interval( 210 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 211 ) 212 213 except Exception as e: 214 try: 215 forecast_result = self.obj.predict(steps=h) 216 mean_forecast = forecast_result 217 ( 218 lower_bound, 219 upper_bound, 220 ) = self._compute_confidence_intervals( 221 forecast_result, alpha=self.alpha_ / 100, **kwargs 222 ) 223 224 except Exception as e: 225 try: 226 forecast_result = self.obj.get_forecast(steps=h) 227 mean_forecast = forecast_result.predicted_mean 228 lower_bound = forecast_result.conf_int()[:, 0] 229 upper_bound = forecast_result.conf_int()[:, 1] 230 231 except Exception as e: 232 try: 233 forecast_result = self.obj.forecast(steps=h) 234 residuals = self.obj.resid 235 std_errors = np.std(residuals) 236 mean_forecast = forecast_result 237 lower_bound = ( 238 forecast_result - pi_multiplier * std_errors 239 ) 240 upper_bound = ( 241 forecast_result + pi_multiplier * std_errors 242 ) 243 244 except Exception as e: 245 try: 246 mean_forecast = self.obj.forecast( 247 steps=h 248 ).values 249 forecast_result = self.obj.prediction_intervals( 250 steps=h, alpha=self.alpha_ / 100, **kwargs 251 ) 252 lower_bound = forecast_result["lower"].values 253 upper_bound = forecast_result["upper"].values 254 except Exception: 255 mean_forecast = self.obj.forecast(steps=h) 256 forecast_result = self.obj.prediction_intervals( 257 steps=h, alpha=self.alpha_ / 100, **kwargs 258 ) 259 lower_bound = forecast_result["lower"] 260 upper_bound = forecast_result["upper"] 261 262 else: 263 if self.model == "VAR": 264 ( 265 mean_forecast, 266 lower_bound, 267 upper_bound, 268 ) = self.obj.forecast_interval( 269 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 270 ) 271 272 elif self.model == "VECM": 273 forecast_result = self.obj.predict(steps=h) 274 mean_forecast = forecast_result 275 lower_bound, upper_bound = self._compute_confidence_intervals( 276 forecast_result, alpha=self.alpha_ / 100, **kwargs 277 ) 278 279 elif self.model == "ARIMA": 280 forecast_result = self.obj.get_forecast(steps=h) 281 mean_forecast = forecast_result.predicted_mean 282 lower_bound = forecast_result.conf_int()[:, 0] 283 upper_bound = forecast_result.conf_int()[:, 1] 284 285 elif self.model == "ETS": 286 forecast_result = self.obj.forecast(steps=h) 287 residuals = self.obj.resid 288 std_errors = np.std(residuals) 289 mean_forecast = forecast_result 290 lower_bound = forecast_result - pi_multiplier * std_errors 291 upper_bound = forecast_result + pi_multiplier * std_errors 292 293 elif self.model == "Theta": 294 try: 295 mean_forecast = self.obj.forecast(steps=h).values 296 forecast_result = self.obj.prediction_intervals( 297 steps=h, alpha=self.alpha_ / 100, **kwargs 298 ) 299 lower_bound = forecast_result["lower"].values 300 upper_bound = forecast_result["upper"].values 301 except Exception: 302 mean_forecast = self.obj.forecast(steps=h) 303 forecast_result = self.obj.prediction_intervals( 304 steps=h, alpha=self.alpha_ / 100, **kwargs 305 ) 306 lower_bound = forecast_result["lower"] 307 upper_bound = forecast_result["upper"] 308 309 else: 310 raise ValueError("model not recognized") 311 312 try: 313 self.mean_ = pd.DataFrame( 314 mean_forecast, 315 columns=self.series_names, 316 index=self.output_dates_, 317 ) 318 self.lower_ = pd.DataFrame( 319 lower_bound, columns=self.series_names, index=self.output_dates_ 320 ) 321 self.upper_ = pd.DataFrame( 322 upper_bound, columns=self.series_names, index=self.output_dates_ 323 ) 324 except Exception: 325 self.mean_ = pd.Series( 326 mean_forecast, name=self.series_names, index=self.output_dates_ 327 ) 328 self.lower_ = pd.Series( 329 lower_bound, name=self.series_names, index=self.output_dates_ 330 ) 331 self.upper_ = pd.Series( 332 upper_bound, name=self.series_names, index=self.output_dates_ 333 ) 334 335 return DescribeResult( 336 mean=self.mean_, lower=self.lower_, upper=self.upper_ 337 )
Forecast all the time series, h steps ahead
Parameters:
h: {integer} Forecasting horizon
**kwargs: additional parameters to be passed to self.cook_test_set
Returns:
model predictions for horizon = h: {array-like}
354 def score(self, X, training_index, testing_index, scoring=None, **kwargs): 355 """Train on training_index, score on testing_index.""" 356 357 assert ( 358 bool(set(training_index).intersection(set(testing_index))) == False 359 ), "Non-overlapping 'training_index' and 'testing_index' required" 360 361 # Dimensions 362 try: 363 # multivariate time series 364 n, p = X.shape 365 except: 366 # univariate time series 367 n = X.shape[0] 368 p = 1 369 370 # Training and testing sets 371 if p > 1: 372 X_train = X[training_index, :] 373 X_test = X[testing_index, :] 374 else: 375 X_train = X[training_index] 376 X_test = X[testing_index] 377 378 # Horizon 379 h = len(testing_index) 380 assert ( 381 len(training_index) + h 382 ) <= n, "Please check lengths of training and testing windows" 383 384 # Fit and predict 385 self.fit(X_train, **kwargs) 386 preds = self.predict(h=h, **kwargs) 387 388 if scoring is None: 389 scoring = "neg_root_mean_squared_error" 390 391 # check inputs 392 assert scoring in ( 393 "explained_variance", 394 "neg_mean_absolute_error", 395 "neg_mean_squared_error", 396 "neg_root_mean_squared_error", 397 "neg_mean_squared_log_error", 398 "neg_median_absolute_error", 399 "r2", 400 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 401 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 402 'neg_median_absolute_error', 'r2')" 403 404 scoring_options = { 405 "explained_variance": skm2.explained_variance_score, 406 "neg_mean_absolute_error": skm2.mean_absolute_error, 407 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 408 "neg_root_mean_squared_error": lambda x, y: np.sqrt( 409 np.mean((x - y) ** 2) 410 ), 411 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 412 "neg_median_absolute_error": skm2.median_absolute_error, 413 "r2": skm2.r2_score, 414 } 415 416 # if p > 1: 417 # return tuple( 418 # [ 419 # scoring_options[scoring]( 420 # X_test[:, i], preds[:, i]#, **kwargs 421 # ) 422 # for i in range(p) 423 # ] 424 # ) 425 # else: 426 return scoring_options[scoring](X_test, preds)
Train on training_index, score on testing_index.
16class CustomClassifier(Custom, ClassifierMixin): 17 """Custom Classification model 18 19 Attributes: 20 21 obj: object 22 any object containing a method fit (obj.fit()) and a method predict 23 (obj.predict()) 24 25 n_hidden_features: int 26 number of nodes in the hidden layer 27 28 activation_name: str 29 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 30 31 a: float 32 hyperparameter for 'prelu' or 'elu' activation function 33 34 nodes_sim: str 35 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 36 'uniform' 37 38 bias: boolean 39 indicates if the hidden layer contains a bias term (True) or not 40 (False) 41 42 dropout: float 43 regularization parameter; (random) percentage of nodes dropped out 44 of the training 45 46 direct_link: boolean 47 indicates if the original predictors are included (True) in model''s 48 fitting or not (False) 49 50 n_clusters: int 51 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 52 no clustering) 53 54 cluster_encode: bool 55 defines how the variable containing clusters is treated (default is one-hot) 56 if `False`, then labels are used, without one-hot encoding 57 58 type_clust: str 59 type of clustering method: currently k-means ('kmeans') or Gaussian 60 Mixture Model ('gmm') 61 62 type_scaling: a tuple of 3 strings 63 scaling methods for inputs, hidden layer, and clustering respectively 64 (and when relevant). 65 Currently available: standardization ('std') or MinMax scaling ('minmax') 66 67 col_sample: float 68 percentage of covariates randomly chosen for training 69 70 row_sample: float 71 percentage of rows chosen for training, by stratified bootstrapping 72 73 cv_calibration: int, cross-validation generator, or iterable, default=2 74 Determines the cross-validation splitting strategy. Same as 75 `sklearn.calibration.CalibratedClassifierCV` 76 77 calibration_method: str 78 {‘sigmoid’, ‘isotonic’}, default=’sigmoid’ 79 The method to use for calibration. Same as 80 `sklearn.calibration.CalibratedClassifierCV` 81 82 seed: int 83 reproducibility seed for nodes_sim=='uniform' 84 85 backend: str 86 "cpu" or "gpu" or "tpu" 87 88 Examples: 89 90 Note: it's better to use the `DeepClassifier` or `LazyDeepClassifier` classes directly 91 92 ```python 93 import nnetsauce as ns 94 from sklearn.ensemble import RandomForestClassifier 95 from sklearn.model_selection import train_test_split 96 from sklearn.datasets import load_digits 97 from time import time 98 99 digits = load_digits() 100 X = digits.data 101 y = digits.target 102 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 103 random_state=123) 104 105 # layer 1 (base layer) ---- 106 layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123) 107 108 start = time() 109 110 layer1_regr.fit(X_train, y_train) 111 112 # Accuracy in layer 1 113 print(layer1_regr.score(X_test, y_test)) 114 115 # layer 2 using layer 1 ---- 116 layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5, 117 direct_link=True, bias=True, 118 nodes_sim='uniform', activation_name='relu', 119 n_clusters=2, seed=123) 120 layer2_regr.fit(X_train, y_train) 121 122 # Accuracy in layer 2 123 print(layer2_regr.score(X_test, y_test)) 124 125 # layer 3 using layer 2 ---- 126 layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10, 127 direct_link=True, bias=True, dropout=0.7, 128 nodes_sim='uniform', activation_name='relu', 129 n_clusters=2, seed=123) 130 layer3_regr.fit(X_train, y_train) 131 132 # Accuracy in layer 3 133 print(layer3_regr.score(X_test, y_test)) 134 135 print(f"Elapsed {time() - start}") 136 ``` 137 138 """ 139 140 # construct the object ----- 141 _estimator_type = "classifier" 142 143 def __init__( 144 self, 145 obj, 146 n_hidden_features=5, 147 activation_name="relu", 148 a=0.01, 149 nodes_sim="sobol", 150 bias=True, 151 dropout=0, 152 direct_link=True, 153 n_clusters=2, 154 cluster_encode=True, 155 type_clust="kmeans", 156 type_scaling=("std", "std", "std"), 157 col_sample=1, 158 row_sample=1, 159 cv_calibration=2, 160 calibration_method="sigmoid", 161 seed=123, 162 backend="cpu", 163 ): 164 super().__init__( 165 obj=obj, 166 n_hidden_features=n_hidden_features, 167 activation_name=activation_name, 168 a=a, 169 nodes_sim=nodes_sim, 170 bias=bias, 171 dropout=dropout, 172 direct_link=direct_link, 173 n_clusters=n_clusters, 174 cluster_encode=cluster_encode, 175 type_clust=type_clust, 176 type_scaling=type_scaling, 177 col_sample=col_sample, 178 row_sample=row_sample, 179 seed=seed, 180 backend=backend, 181 ) 182 self.coef_ = None 183 self.intercept_ = None 184 self.type_fit = "classification" 185 self.cv_calibration = cv_calibration 186 self.calibration_method = calibration_method 187 188 def __sklearn_clone__(self): 189 """Create a clone of the estimator. 190 191 This is required for scikit-learn's calibration system to work properly. 192 """ 193 # Create a new instance with the same parameters 194 clone = CustomClassifier( 195 obj=self.obj, 196 n_hidden_features=self.n_hidden_features, 197 activation_name=self.activation_name, 198 a=self.a, 199 nodes_sim=self.nodes_sim, 200 bias=self.bias, 201 dropout=self.dropout, 202 direct_link=self.direct_link, 203 n_clusters=self.n_clusters, 204 cluster_encode=self.cluster_encode, 205 type_clust=self.type_clust, 206 type_scaling=self.type_scaling, 207 col_sample=self.col_sample, 208 row_sample=self.row_sample, 209 cv_calibration=self.cv_calibration, 210 calibration_method=self.calibration_method, 211 seed=self.seed, 212 backend=self.backend, 213 ) 214 return clone 215 216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit custom model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 sample_weight: array-like, shape = [n_samples] 229 Sample weights. 230 231 **kwargs: additional parameters to be passed to 232 self.cook_training_set or self.obj.fit 233 234 Returns: 235 236 self: object 237 """ 238 239 if len(X.shape) == 1: 240 if isinstance(X, pd.DataFrame): 241 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 242 else: 243 X = X.reshape(1, -1) 244 245 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 246 self.classes_ = np.unique(y) 247 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 248 249 # Wrap in CalibratedClassifierCV if needed 250 if self.cv_calibration is not None: 251 self.obj = CalibratedClassifierCV( 252 self.obj, cv=self.cv_calibration, method=self.calibration_method 253 ) 254 255 # if sample_weights, else: (must use self.row_index) 256 if sample_weight is not None: 257 self.obj.fit( 258 scaled_Z, 259 output_y, 260 sample_weight=sample_weight[self.index_row_].ravel(), 261 **kwargs 262 ) 263 return self 264 265 # if sample_weight is None: 266 self.obj.fit(scaled_Z, output_y, **kwargs) 267 self.classes_ = np.unique(y) # for compatibility with sklearn 268 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 269 270 if hasattr(self.obj, "coef_"): 271 self.coef_ = self.obj.coef_ 272 273 if hasattr(self.obj, "intercept_"): 274 self.intercept_ = self.obj.intercept_ 275 276 return self 277 278 def partial_fit(self, X, y, sample_weight=None, **kwargs): 279 """Partial fit custom model to training data (X, y). 280 281 Parameters: 282 283 X: {array-like}, shape = [n_samples, n_features] 284 Subset of training vectors, where n_samples is the number 285 of samples and n_features is the number of features. 286 287 y: array-like, shape = [n_samples] 288 Subset of target values. 289 290 sample_weight: array-like, shape = [n_samples] 291 Sample weights. 292 293 **kwargs: additional parameters to be passed to 294 self.cook_training_set or self.obj.fit 295 296 Returns: 297 298 self: object 299 """ 300 301 if len(X.shape) == 1: 302 if isinstance(X, pd.DataFrame): 303 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 304 else: 305 X = X.reshape(1, -1) 306 y = np.array([y], dtype=int) 307 308 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 309 self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn 310 311 # if sample_weights, else: (must use self.row_index) 312 if sample_weight is not None: 313 try: 314 self.obj.partial_fit( 315 scaled_Z, 316 output_y, 317 sample_weight=sample_weight[self.index_row_].ravel(), 318 # **kwargs 319 ) 320 except: 321 NotImplementedError 322 323 return self 324 325 # if sample_weight is None: 326 # try: 327 self.obj.partial_fit(scaled_Z, output_y) 328 # except: 329 # raise NotImplementedError 330 331 self.classes_ = np.unique(y) # for compatibility with sklearn 332 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 333 334 return self 335 336 def predict(self, X, **kwargs): 337 """Predict test data X. 338 339 Parameters: 340 341 X: {array-like}, shape = [n_samples, n_features] 342 Training vectors, where n_samples is the number 343 of samples and n_features is the number of features. 344 345 **kwargs: additional parameters to be passed to 346 self.cook_test_set 347 348 Returns: 349 350 model predictions: {array-like} 351 """ 352 353 if len(X.shape) == 1: 354 n_features = X.shape[0] 355 new_X = mo.rbind( 356 X.reshape(1, n_features), 357 np.ones(n_features).reshape(1, n_features), 358 ) 359 360 return ( 361 self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs) 362 )[0] 363 364 return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs) 365 366 def predict_proba(self, X, **kwargs): 367 """Predict probabilities for test data X. 368 369 Args: 370 371 X: {array-like}, shape = [n_samples, n_features] 372 Training vectors, where n_samples is the number 373 of samples and n_features is the number of features. 374 375 **kwargs: additional parameters to be passed to 376 self.cook_test_set 377 378 Returns: 379 380 probability estimates for test data: {array-like} 381 """ 382 383 if len(X.shape) == 1: 384 n_features = X.shape[0] 385 new_X = mo.rbind( 386 X.reshape(1, n_features), 387 np.ones(n_features).reshape(1, n_features), 388 ) 389 return ( 390 self.obj.predict_proba( 391 self.cook_test_set(new_X, **kwargs), **kwargs 392 ) 393 )[0] 394 return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs) 395 396 def decision_function(self, X, **kwargs): 397 """Compute the decision function of X. 398 399 Parameters: 400 X: {array-like}, shape = [n_samples, n_features] 401 Samples to compute decision function for. 402 403 **kwargs: additional parameters to be passed to 404 self.cook_test_set 405 406 Returns: 407 array-like of shape (n_samples,) or (n_samples, n_classes) 408 Decision function of the input samples. The order of outputs is the same 409 as that of the classes passed to fit. 410 """ 411 if not hasattr(self.obj, "decision_function"): 412 # If base classifier doesn't have decision_function, use predict_proba 413 proba = self.predict_proba(X, **kwargs) 414 if proba.shape[1] == 2: 415 return proba[:, 1] # For binary classification 416 return proba # For multiclass 417 418 if len(X.shape) == 1: 419 n_features = X.shape[0] 420 new_X = mo.rbind( 421 X.reshape(1, n_features), 422 np.ones(n_features).reshape(1, n_features), 423 ) 424 425 return ( 426 self.obj.decision_function( 427 self.cook_test_set(new_X, **kwargs), **kwargs 428 ) 429 )[0] 430 431 return self.obj.decision_function( 432 self.cook_test_set(X, **kwargs), **kwargs 433 ) 434 435 def score(self, X, y, scoring=None): 436 """Scoring function for classification. 437 438 Args: 439 440 X: {array-like}, shape = [n_samples, n_features] 441 Training vectors, where n_samples is the number 442 of samples and n_features is the number of features. 443 444 y: array-like, shape = [n_samples] 445 Target values. 446 447 scoring: str 448 scoring method (default is accuracy) 449 450 Returns: 451 452 score: float 453 """ 454 455 if scoring is None: 456 scoring = "accuracy" 457 458 if scoring == "accuracy": 459 return skm2.accuracy_score(y, self.predict(X)) 460 461 if scoring == "f1": 462 return skm2.f1_score(y, self.predict(X)) 463 464 if scoring == "precision": 465 return skm2.precision_score(y, self.predict(X)) 466 467 if scoring == "recall": 468 return skm2.recall_score(y, self.predict(X)) 469 470 if scoring == "roc_auc": 471 return skm2.roc_auc_score(y, self.predict(X)) 472 473 if scoring == "log_loss": 474 return skm2.log_loss(y, self.predict_proba(X)) 475 476 if scoring == "balanced_accuracy": 477 return skm2.balanced_accuracy_score(y, self.predict(X)) 478 479 if scoring == "average_precision": 480 return skm2.average_precision_score(y, self.predict(X)) 481 482 if scoring == "neg_brier_score": 483 return -skm2.brier_score_loss(y, self.predict_proba(X)) 484 485 if scoring == "neg_log_loss": 486 return -skm2.log_loss(y, self.predict_proba(X)) 487 488 @property 489 def _estimator_type(self): 490 return "classifier"
Custom Classification model
Attributes:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model''s
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
cv_calibration: int, cross-validation generator, or iterable, default=2
Determines the cross-validation splitting strategy. Same as
`sklearn.calibration.CalibratedClassifierCV`
calibration_method: str
{‘sigmoid’, ‘isotonic’}, default=’sigmoid’
The method to use for calibration. Same as
`sklearn.calibration.CalibratedClassifierCV`
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Examples:
Note: it's better to use the DeepClassifier or LazyDeepClassifier classes directly
import nnetsauce as ns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits
from time import time
digits = load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=123)
# layer 1 (base layer) ----
layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123)
start = time()
layer1_regr.fit(X_train, y_train)
# Accuracy in layer 1
print(layer1_regr.score(X_test, y_test))
# layer 2 using layer 1 ----
layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5,
direct_link=True, bias=True,
nodes_sim='uniform', activation_name='relu',
n_clusters=2, seed=123)
layer2_regr.fit(X_train, y_train)
# Accuracy in layer 2
print(layer2_regr.score(X_test, y_test))
# layer 3 using layer 2 ----
layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10,
direct_link=True, bias=True, dropout=0.7,
nodes_sim='uniform', activation_name='relu',
n_clusters=2, seed=123)
layer3_regr.fit(X_train, y_train)
# Accuracy in layer 3
print(layer3_regr.score(X_test, y_test))
print(f"Elapsed {time() - start}")
216 def fit(self, X, y, sample_weight=None, **kwargs): 217 """Fit custom model to training data (X, y). 218 219 Parameters: 220 221 X: {array-like}, shape = [n_samples, n_features] 222 Training vectors, where n_samples is the number 223 of samples and n_features is the number of features. 224 225 y: array-like, shape = [n_samples] 226 Target values. 227 228 sample_weight: array-like, shape = [n_samples] 229 Sample weights. 230 231 **kwargs: additional parameters to be passed to 232 self.cook_training_set or self.obj.fit 233 234 Returns: 235 236 self: object 237 """ 238 239 if len(X.shape) == 1: 240 if isinstance(X, pd.DataFrame): 241 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 242 else: 243 X = X.reshape(1, -1) 244 245 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 246 self.classes_ = np.unique(y) 247 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 248 249 # Wrap in CalibratedClassifierCV if needed 250 if self.cv_calibration is not None: 251 self.obj = CalibratedClassifierCV( 252 self.obj, cv=self.cv_calibration, method=self.calibration_method 253 ) 254 255 # if sample_weights, else: (must use self.row_index) 256 if sample_weight is not None: 257 self.obj.fit( 258 scaled_Z, 259 output_y, 260 sample_weight=sample_weight[self.index_row_].ravel(), 261 **kwargs 262 ) 263 return self 264 265 # if sample_weight is None: 266 self.obj.fit(scaled_Z, output_y, **kwargs) 267 self.classes_ = np.unique(y) # for compatibility with sklearn 268 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 269 270 if hasattr(self.obj, "coef_"): 271 self.coef_ = self.obj.coef_ 272 273 if hasattr(self.obj, "intercept_"): 274 self.intercept_ = self.obj.intercept_ 275 276 return self
Fit custom model to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
336 def predict(self, X, **kwargs): 337 """Predict test data X. 338 339 Parameters: 340 341 X: {array-like}, shape = [n_samples, n_features] 342 Training vectors, where n_samples is the number 343 of samples and n_features is the number of features. 344 345 **kwargs: additional parameters to be passed to 346 self.cook_test_set 347 348 Returns: 349 350 model predictions: {array-like} 351 """ 352 353 if len(X.shape) == 1: 354 n_features = X.shape[0] 355 new_X = mo.rbind( 356 X.reshape(1, n_features), 357 np.ones(n_features).reshape(1, n_features), 358 ) 359 360 return ( 361 self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs) 362 )[0] 363 364 return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
366 def predict_proba(self, X, **kwargs): 367 """Predict probabilities for test data X. 368 369 Args: 370 371 X: {array-like}, shape = [n_samples, n_features] 372 Training vectors, where n_samples is the number 373 of samples and n_features is the number of features. 374 375 **kwargs: additional parameters to be passed to 376 self.cook_test_set 377 378 Returns: 379 380 probability estimates for test data: {array-like} 381 """ 382 383 if len(X.shape) == 1: 384 n_features = X.shape[0] 385 new_X = mo.rbind( 386 X.reshape(1, n_features), 387 np.ones(n_features).reshape(1, n_features), 388 ) 389 return ( 390 self.obj.predict_proba( 391 self.cook_test_set(new_X, **kwargs), **kwargs 392 ) 393 )[0] 394 return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs)
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
435 def score(self, X, y, scoring=None): 436 """Scoring function for classification. 437 438 Args: 439 440 X: {array-like}, shape = [n_samples, n_features] 441 Training vectors, where n_samples is the number 442 of samples and n_features is the number of features. 443 444 y: array-like, shape = [n_samples] 445 Target values. 446 447 scoring: str 448 scoring method (default is accuracy) 449 450 Returns: 451 452 score: float 453 """ 454 455 if scoring is None: 456 scoring = "accuracy" 457 458 if scoring == "accuracy": 459 return skm2.accuracy_score(y, self.predict(X)) 460 461 if scoring == "f1": 462 return skm2.f1_score(y, self.predict(X)) 463 464 if scoring == "precision": 465 return skm2.precision_score(y, self.predict(X)) 466 467 if scoring == "recall": 468 return skm2.recall_score(y, self.predict(X)) 469 470 if scoring == "roc_auc": 471 return skm2.roc_auc_score(y, self.predict(X)) 472 473 if scoring == "log_loss": 474 return skm2.log_loss(y, self.predict_proba(X)) 475 476 if scoring == "balanced_accuracy": 477 return skm2.balanced_accuracy_score(y, self.predict(X)) 478 479 if scoring == "average_precision": 480 return skm2.average_precision_score(y, self.predict(X)) 481 482 if scoring == "neg_brier_score": 483 return -skm2.brier_score_loss(y, self.predict_proba(X)) 484 485 if scoring == "neg_log_loss": 486 return -skm2.log_loss(y, self.predict_proba(X))
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
18class CustomRegressor(Custom, RegressorMixin): 19 """Custom Regression model 20 21 This class is used to 'augment' any regression model with transformed features. 22 23 Parameters: 24 25 obj: object 26 any object containing a method fit (obj.fit()) and a method predict 27 (obj.predict()) 28 29 n_hidden_features: int 30 number of nodes in the hidden layer 31 32 activation_name: str 33 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 34 35 a: float 36 hyperparameter for 'prelu' or 'elu' activation function 37 38 nodes_sim: str 39 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 40 'uniform' 41 42 bias: boolean 43 indicates if the hidden layer contains a bias term (True) or not 44 (False) 45 46 dropout: float 47 regularization parameter; (random) percentage of nodes dropped out 48 of the training 49 50 direct_link: boolean 51 indicates if the original predictors are included (True) in model's 52 fitting or not (False) 53 54 n_clusters: int 55 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 56 no clustering) 57 58 cluster_encode: bool 59 defines how the variable containing clusters is treated (default is one-hot) 60 if `False`, then labels are used, without one-hot encoding 61 62 type_clust: str 63 type of clustering method: currently k-means ('kmeans') or Gaussian 64 Mixture Model ('gmm') 65 66 type_scaling: a tuple of 3 strings 67 scaling methods for inputs, hidden layer, and clustering respectively 68 (and when relevant). 69 Currently available: standardization ('std') or MinMax scaling ('minmax') 70 71 type_pi: str. 72 type of prediction interval; currently `None` (split or local 73 conformal without simulation), "kde" or "bootstrap" (simulated split 74 conformal). 75 76 replications: int. 77 number of replications (if needed) for predictive simulation. 78 Used only in `self.predict`, for `self.kernel` in ('gaussian', 79 'tophat') and `self.type_pi = 'kde'`. Default is `None`. 80 81 kernel: str. 82 the kernel to use for kernel density estimation (used for predictive 83 simulation in `self.predict`, with `method='splitconformal'` and 84 `type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'. 85 86 type_split: str. 87 Type of splitting for conformal prediction. None (default), or 88 "random" (random split of data) or "sequential" (sequential split of data) 89 90 col_sample: float 91 percentage of covariates randomly chosen for training 92 93 row_sample: float 94 percentage of rows chosen for training, by stratified bootstrapping 95 96 level: float 97 confidence level for prediction intervals 98 99 pi_method: str 100 method for prediction intervals: 'splitconformal' or 'localconformal' 101 102 seed: int 103 reproducibility seed for nodes_sim=='uniform' 104 105 type_fit: str 106 'regression' 107 108 backend: str 109 "cpu" or "gpu" or "tpu" 110 111 Examples: 112 113 See [https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression](https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression) 114 115 """ 116 117 # construct the object ----- 118 119 def __init__( 120 self, 121 obj, 122 n_hidden_features=5, 123 activation_name="relu", 124 a=0.01, 125 nodes_sim="sobol", 126 bias=True, 127 dropout=0, 128 direct_link=True, 129 n_clusters=2, 130 cluster_encode=True, 131 type_clust="kmeans", 132 type_scaling=("std", "std", "std"), 133 type_pi=None, 134 replications=None, 135 kernel=None, 136 type_split=None, 137 col_sample=1, 138 row_sample=1, 139 level=None, 140 pi_method=None, 141 seed=123, 142 backend="cpu", 143 ): 144 super().__init__( 145 obj=obj, 146 n_hidden_features=n_hidden_features, 147 activation_name=activation_name, 148 a=a, 149 nodes_sim=nodes_sim, 150 bias=bias, 151 dropout=dropout, 152 direct_link=direct_link, 153 n_clusters=n_clusters, 154 cluster_encode=cluster_encode, 155 type_clust=type_clust, 156 type_scaling=type_scaling, 157 col_sample=col_sample, 158 row_sample=row_sample, 159 seed=seed, 160 backend=backend, 161 ) 162 163 self.type_fit = "regression" 164 self.type_pi = type_pi 165 self.replications = replications 166 self.kernel = kernel 167 self.type_split = type_split 168 self.level = level 169 self.pi_method = pi_method 170 self.coef_ = None 171 self.intercept_ = None 172 self.X_ = None 173 self.y_ = None 174 self.aic_ = None 175 self.aicc_ = None 176 self.bic_ = None 177 178 def fit(self, X, y, sample_weight=None, **kwargs): 179 """Fit custom model to training data (X, y). 180 181 Parameters: 182 183 X: {array-like}, shape = [n_samples, n_features] 184 Training vectors, where n_samples is the number 185 of samples and n_features is the number of features. 186 187 y: array-like, shape = [n_samples] 188 Target values. 189 190 sample_weight: array-like, shape = [n_samples] 191 Sample weights. 192 193 **kwargs: additional parameters to be passed to 194 self.cook_training_set or self.obj.fit 195 196 Returns: 197 198 self: object 199 200 """ 201 202 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 203 204 if self.level is not None: 205 self.obj = PredictionInterval( 206 obj=self.obj, method=self.pi_method, level=self.level 207 ) 208 209 # if sample_weights, else: (must use self.row_index) 210 if sample_weight is not None: 211 self.obj.fit( 212 scaled_Z, 213 centered_y, 214 sample_weight=sample_weight[self.index_row_].ravel(), 215 **kwargs 216 ) 217 218 return self 219 220 self.obj.fit(scaled_Z, centered_y, **kwargs) 221 222 self.X_ = X 223 224 self.y_ = y 225 226 # Compute SSE 227 centered_y_pred = self.obj.predict(scaled_Z) 228 self.sse_ = np.sum((centered_y - centered_y_pred) ** 2) 229 230 # Get number of parameters 231 n_params = ( 232 self.n_hidden_features + X.shape[1] 233 ) # hidden features + original features 234 if self.n_clusters > 0: 235 n_params += self.n_clusters # add clusters if used 236 237 # Compute information criteria 238 n_samples = X.shape[0] 239 temp = n_samples * np.log(self.sse_ / n_samples) 240 self.aic_ = temp + 2 * n_params 241 self.bic_ = temp + np.log(n_samples) * n_params 242 243 if hasattr(self.obj, "coef_"): 244 self.coef_ = self.obj.coef_ 245 246 if hasattr(self.obj, "intercept_"): 247 self.intercept_ = self.obj.intercept_ 248 249 return self 250 251 def partial_fit(self, X, y, **kwargs): 252 """Partial fit custom model to training data (X, y). 253 254 Parameters: 255 256 X: {array-like}, shape = [n_samples, n_features] 257 Subset of training vectors, where n_samples is the number 258 of samples and n_features is the number of features. 259 260 y: array-like, shape = [n_samples] 261 Subset of target values. 262 263 **kwargs: additional parameters to be passed to 264 self.cook_training_set or self.obj.fit 265 266 Returns: 267 268 self: object 269 270 """ 271 272 if len(X.shape) == 1: 273 if isinstance(X, pd.DataFrame): 274 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 275 else: 276 X = X.reshape(1, -1) 277 y = np.array([y]) 278 279 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 280 281 self.obj.partial_fit(scaled_Z, centered_y, **kwargs) 282 283 self.X_ = X 284 285 self.y_ = y 286 287 return self 288 289 def predict(self, X, level=95, method="splitconformal", **kwargs): 290 """Predict test data X. 291 292 Parameters: 293 294 X: {array-like}, shape = [n_samples, n_features] 295 Training vectors, where n_samples is the number 296 of samples and n_features is the number of features. 297 298 level: int 299 Level of confidence (default = 95) 300 301 method: str 302 'splitconformal', 'localconformal' 303 prediction (if you specify `return_pi = True`) 304 305 **kwargs: additional parameters 306 `return_pi = True` for conformal prediction, 307 with `method` in ('splitconformal', 'localconformal') 308 or `return_std = True` for `self.obj` in 309 (`sklearn.linear_model.BayesianRidge`, 310 `sklearn.linear_model.ARDRegressor`, 311 `sklearn.gaussian_process.GaussianProcessRegressor`)` 312 313 Returns: 314 315 model predictions: 316 an array if uncertainty quantification is not requested, 317 or a tuple if with prediction intervals and simulations 318 if `return_std = True` (mean, standard deviation, 319 lower and upper prediction interval) or `return_pi = True` 320 () 321 322 """ 323 324 if "return_std" in kwargs: 325 alpha = 100 - level 326 pi_multiplier = norm.ppf(1 - alpha / 200) 327 328 if len(X.shape) == 1: 329 n_features = X.shape[0] 330 new_X = mo.rbind( 331 X.reshape(1, n_features), 332 np.ones(n_features).reshape(1, n_features), 333 ) 334 335 mean_, std_ = self.obj.predict( 336 self.cook_test_set(new_X, **kwargs), return_std=True 337 )[0] 338 339 preds = self.y_mean_ + mean_ 340 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 341 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 342 343 DescribeResults = namedtuple( 344 "DescribeResults", ["mean", "std", "lower", "upper"] 345 ) 346 347 return DescribeResults(preds, std_, lower, upper) 348 349 # len(X.shape) > 1 350 mean_, std_ = self.obj.predict( 351 self.cook_test_set(X, **kwargs), return_std=True 352 ) 353 354 preds = self.y_mean_ + mean_ 355 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 356 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 357 358 DescribeResults = namedtuple( 359 "DescribeResults", ["mean", "std", "lower", "upper"] 360 ) 361 362 return DescribeResults(preds, std_, lower, upper) 363 364 if "return_pi" in kwargs: 365 assert method in ( 366 "splitconformal", 367 "localconformal", 368 ), "method must be in ('splitconformal', 'localconformal')" 369 self.pi = PredictionInterval( 370 obj=self, 371 method=method, 372 level=level, 373 type_pi=self.type_pi, 374 replications=self.replications, 375 kernel=self.kernel, 376 ) 377 378 if len(self.X_.shape) == 1: 379 if isinstance(X, pd.DataFrame): 380 self.X_ = pd.DataFrame( 381 self.X_.values.reshape(1, -1), columns=self.X_.columns 382 ) 383 else: 384 self.X_ = self.X_.reshape(1, -1) 385 self.y_ = np.array([self.y_]) 386 387 self.pi.fit(self.X_, self.y_) 388 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 389 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 390 preds = self.pi.predict(X, return_pi=True) 391 return preds 392 393 # "return_std" not in kwargs 394 if len(X.shape) == 1: 395 n_features = X.shape[0] 396 new_X = mo.rbind( 397 X.reshape(1, n_features), 398 np.ones(n_features).reshape(1, n_features), 399 ) 400 401 return ( 402 self.y_mean_ 403 + self.obj.predict( 404 self.cook_test_set(new_X, **kwargs), **kwargs 405 ) 406 )[0] 407 408 # len(X.shape) > 1 409 return self.y_mean_ + self.obj.predict( 410 self.cook_test_set(X, **kwargs), **kwargs 411 ) 412 413 def score(self, X, y, scoring=None): 414 """Compute the score of the model. 415 416 Parameters: 417 418 X: {array-like}, shape = [n_samples, n_features] 419 Training vectors, where n_samples is the number 420 of samples and n_features is the number of features. 421 422 y: array-like, shape = [n_samples] 423 Target values. 424 425 scoring: str 426 scoring method 427 428 Returns: 429 430 score: float 431 432 """ 433 434 if scoring is None: 435 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 436 437 return skm2.get_scorer(scoring)(self, X, y)
Custom Regression model
This class is used to 'augment' any regression model with transformed features.
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
type_pi: str.
type of prediction interval; currently `None` (split or local
conformal without simulation), "kde" or "bootstrap" (simulated split
conformal).
replications: int.
number of replications (if needed) for predictive simulation.
Used only in `self.predict`, for `self.kernel` in ('gaussian',
'tophat') and `self.type_pi = 'kde'`. Default is `None`.
kernel: str.
the kernel to use for kernel density estimation (used for predictive
simulation in `self.predict`, with `method='splitconformal'` and
`type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'.
type_split: str.
Type of splitting for conformal prediction. None (default), or
"random" (random split of data) or "sequential" (sequential split of data)
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
level: float
confidence level for prediction intervals
pi_method: str
method for prediction intervals: 'splitconformal' or 'localconformal'
seed: int
reproducibility seed for nodes_sim=='uniform'
type_fit: str
'regression'
backend: str
"cpu" or "gpu" or "tpu"
Examples:
See https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression
178 def fit(self, X, y, sample_weight=None, **kwargs): 179 """Fit custom model to training data (X, y). 180 181 Parameters: 182 183 X: {array-like}, shape = [n_samples, n_features] 184 Training vectors, where n_samples is the number 185 of samples and n_features is the number of features. 186 187 y: array-like, shape = [n_samples] 188 Target values. 189 190 sample_weight: array-like, shape = [n_samples] 191 Sample weights. 192 193 **kwargs: additional parameters to be passed to 194 self.cook_training_set or self.obj.fit 195 196 Returns: 197 198 self: object 199 200 """ 201 202 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 203 204 if self.level is not None: 205 self.obj = PredictionInterval( 206 obj=self.obj, method=self.pi_method, level=self.level 207 ) 208 209 # if sample_weights, else: (must use self.row_index) 210 if sample_weight is not None: 211 self.obj.fit( 212 scaled_Z, 213 centered_y, 214 sample_weight=sample_weight[self.index_row_].ravel(), 215 **kwargs 216 ) 217 218 return self 219 220 self.obj.fit(scaled_Z, centered_y, **kwargs) 221 222 self.X_ = X 223 224 self.y_ = y 225 226 # Compute SSE 227 centered_y_pred = self.obj.predict(scaled_Z) 228 self.sse_ = np.sum((centered_y - centered_y_pred) ** 2) 229 230 # Get number of parameters 231 n_params = ( 232 self.n_hidden_features + X.shape[1] 233 ) # hidden features + original features 234 if self.n_clusters > 0: 235 n_params += self.n_clusters # add clusters if used 236 237 # Compute information criteria 238 n_samples = X.shape[0] 239 temp = n_samples * np.log(self.sse_ / n_samples) 240 self.aic_ = temp + 2 * n_params 241 self.bic_ = temp + np.log(n_samples) * n_params 242 243 if hasattr(self.obj, "coef_"): 244 self.coef_ = self.obj.coef_ 245 246 if hasattr(self.obj, "intercept_"): 247 self.intercept_ = self.obj.intercept_ 248 249 return self
Fit custom model to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
289 def predict(self, X, level=95, method="splitconformal", **kwargs): 290 """Predict test data X. 291 292 Parameters: 293 294 X: {array-like}, shape = [n_samples, n_features] 295 Training vectors, where n_samples is the number 296 of samples and n_features is the number of features. 297 298 level: int 299 Level of confidence (default = 95) 300 301 method: str 302 'splitconformal', 'localconformal' 303 prediction (if you specify `return_pi = True`) 304 305 **kwargs: additional parameters 306 `return_pi = True` for conformal prediction, 307 with `method` in ('splitconformal', 'localconformal') 308 or `return_std = True` for `self.obj` in 309 (`sklearn.linear_model.BayesianRidge`, 310 `sklearn.linear_model.ARDRegressor`, 311 `sklearn.gaussian_process.GaussianProcessRegressor`)` 312 313 Returns: 314 315 model predictions: 316 an array if uncertainty quantification is not requested, 317 or a tuple if with prediction intervals and simulations 318 if `return_std = True` (mean, standard deviation, 319 lower and upper prediction interval) or `return_pi = True` 320 () 321 322 """ 323 324 if "return_std" in kwargs: 325 alpha = 100 - level 326 pi_multiplier = norm.ppf(1 - alpha / 200) 327 328 if len(X.shape) == 1: 329 n_features = X.shape[0] 330 new_X = mo.rbind( 331 X.reshape(1, n_features), 332 np.ones(n_features).reshape(1, n_features), 333 ) 334 335 mean_, std_ = self.obj.predict( 336 self.cook_test_set(new_X, **kwargs), return_std=True 337 )[0] 338 339 preds = self.y_mean_ + mean_ 340 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 341 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 342 343 DescribeResults = namedtuple( 344 "DescribeResults", ["mean", "std", "lower", "upper"] 345 ) 346 347 return DescribeResults(preds, std_, lower, upper) 348 349 # len(X.shape) > 1 350 mean_, std_ = self.obj.predict( 351 self.cook_test_set(X, **kwargs), return_std=True 352 ) 353 354 preds = self.y_mean_ + mean_ 355 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 356 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 357 358 DescribeResults = namedtuple( 359 "DescribeResults", ["mean", "std", "lower", "upper"] 360 ) 361 362 return DescribeResults(preds, std_, lower, upper) 363 364 if "return_pi" in kwargs: 365 assert method in ( 366 "splitconformal", 367 "localconformal", 368 ), "method must be in ('splitconformal', 'localconformal')" 369 self.pi = PredictionInterval( 370 obj=self, 371 method=method, 372 level=level, 373 type_pi=self.type_pi, 374 replications=self.replications, 375 kernel=self.kernel, 376 ) 377 378 if len(self.X_.shape) == 1: 379 if isinstance(X, pd.DataFrame): 380 self.X_ = pd.DataFrame( 381 self.X_.values.reshape(1, -1), columns=self.X_.columns 382 ) 383 else: 384 self.X_ = self.X_.reshape(1, -1) 385 self.y_ = np.array([self.y_]) 386 387 self.pi.fit(self.X_, self.y_) 388 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 389 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 390 preds = self.pi.predict(X, return_pi=True) 391 return preds 392 393 # "return_std" not in kwargs 394 if len(X.shape) == 1: 395 n_features = X.shape[0] 396 new_X = mo.rbind( 397 X.reshape(1, n_features), 398 np.ones(n_features).reshape(1, n_features), 399 ) 400 401 return ( 402 self.y_mean_ 403 + self.obj.predict( 404 self.cook_test_set(new_X, **kwargs), **kwargs 405 ) 406 )[0] 407 408 # len(X.shape) > 1 409 return self.y_mean_ + self.obj.predict( 410 self.cook_test_set(X, **kwargs), **kwargs 411 )
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
level: int
Level of confidence (default = 95)
method: str
'splitconformal', 'localconformal'
prediction (if you specify `return_pi = True`)
**kwargs: additional parameters
`return_pi = True` for conformal prediction,
with `method` in ('splitconformal', 'localconformal')
or `return_std = True` for `self.obj` in
(`sklearn.linear_model.BayesianRidge`,
`sklearn.linear_model.ARDRegressor`,
`sklearn.gaussian_process.GaussianProcessRegressor`)`
Returns:
model predictions:
an array if uncertainty quantification is not requested,
or a tuple if with prediction intervals and simulations
if `return_std = True` (mean, standard deviation,
lower and upper prediction interval) or `return_pi = True`
()
413 def score(self, X, y, scoring=None): 414 """Compute the score of the model. 415 416 Parameters: 417 418 X: {array-like}, shape = [n_samples, n_features] 419 Training vectors, where n_samples is the number 420 of samples and n_features is the number of features. 421 422 y: array-like, shape = [n_samples] 423 Target values. 424 425 scoring: str 426 scoring method 427 428 Returns: 429 430 score: float 431 432 """ 433 434 if scoring is None: 435 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 436 437 return skm2.get_scorer(scoring)(self, X, y)
Compute the score of the model.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method
Returns:
score: float
18class CustomBackPropRegressor(Custom, RegressorMixin): 19 """ 20 Finite difference trainer for nnetsauce models. 21 22 Parameters 23 ---------- 24 25 base_model : str 26 The name of the base model (e.g., 'RidgeCV'). 27 28 type_grad : {'finitediff', 'autodiff'}, optional 29 Type of gradient computation to use (default='finitediff'). 30 31 lr : float, optional 32 Learning rate for optimization (default=1e-4). 33 34 optimizer : {'gd', 'sgd', 'adam', 'cd'}, optional 35 Optimization algorithm: gradient descent ('gd'), stochastic gradient descent ('sgd'), 36 Adam ('adam'), or coordinate descent ('cd'). Default is 'gd'. 37 38 eps : float, optional 39 Scaling factor for adaptive finite difference step size (default=1e-3). 40 41 batch_size : int, optional 42 Batch size for 'sgd' optimizer (default=32). 43 44 alpha : float, optional 45 Elastic net penalty strength (default=0.0). 46 47 l1_ratio : float, optional 48 Elastic net mixing parameter (0 = Ridge, 1 = Lasso, default=0.0). 49 50 type_loss : {'mse', 'quantile'}, optional 51 Type of loss function to use (default='mse'). 52 53 q : float, optional 54 Quantile for quantile loss (default=0.5). 55 56 **kwargs 57 Additional parameters to pass to the scikit-learn model. 58 59 """ 60 61 def __init__( 62 self, 63 base_model, 64 type_grad="finitediff", 65 lr=1e-4, 66 optimizer="gd", 67 eps=1e-3, 68 batch_size=32, 69 alpha=0.0, 70 l1_ratio=0.0, 71 type_loss="mse", 72 q=0.5, 73 backend="cpu", 74 **kwargs, 75 ): 76 super().__init__(base_model, True, **kwargs) 77 self.base_model = base_model 78 self.custom_kwargs = kwargs 79 self.backend = backend 80 self.model = ns.CustomRegressor( 81 self.base_model, backend=self.backend, **self.custom_kwargs 82 ) 83 assert isinstance( 84 self.model, ns.CustomRegressor 85 ), "'model' must be of class ns.CustomRegressor" 86 self.type_grad = type_grad 87 self.lr = lr 88 self.optimizer = optimizer 89 self.eps = eps 90 self.loss_history_ = [] 91 self.opt_state = None 92 self.batch_size = batch_size # for SGD 93 self.loss_history_ = [] 94 self._cd_index = 0 # For coordinate descent 95 self.alpha = alpha 96 self.l1_ratio = l1_ratio 97 self.type_loss = type_loss 98 self.q = q 99 100 def _loss(self, X, y, **kwargs): 101 """ 102 Compute the loss (with elastic net penalty) for the current model. 103 104 Parameters 105 ---------- 106 107 X : array-like of shape (n_samples, n_features) 108 Input data. 109 110 y : array-like of shape (n_samples,) 111 Target values. 112 113 **kwargs 114 Additional keyword arguments for loss calculation. 115 116 Returns 117 ------- 118 float 119 The computed loss value. 120 """ 121 y_pred = self.model.predict(X) 122 if self.type_loss == "mse": 123 loss = np.mean((y - y_pred) ** 2) 124 elif self.type_loss == "quantile": 125 loss = mean_pinball_loss(y, y_pred, alpha=self.q, **kwargs) 126 W = self.model.W_ 127 l1 = np.sum(np.abs(W)) 128 l2 = np.sum(W**2) 129 return loss + self.alpha * ( 130 self.l1_ratio * l1 + 0.5 * (1 - self.l1_ratio) * l2 131 ) 132 133 def _compute_grad(self, X, y): 134 """ 135 Compute the gradient of the loss with respect to W_ using finite differences. 136 137 Parameters 138 ---------- 139 140 X : array-like of shape (n_samples, n_features) 141 Input data. 142 143 y : array-like of shape (n_samples,) 144 Target values. 145 146 Returns 147 ------- 148 149 ndarray 150 Gradient array with the same shape as W_. 151 """ 152 153 # Finite difference gradient computation 154 W = deepcopy(self.model.W_) 155 shape = W.shape 156 W_flat = W.flatten() 157 n_params = W_flat.size 158 159 # Adaptive finite difference step 160 h_vec = self.eps * np.maximum(1.0, np.abs(W_flat)) 161 eye = np.eye(n_params) 162 163 loss_plus = np.zeros(n_params) 164 loss_minus = np.zeros(n_params) 165 166 for i in range(n_params): 167 h_i = h_vec[i] 168 Wp = W_flat.copy() 169 Wp[i] += h_i 170 Wm = W_flat.copy() 171 Wm[i] -= h_i 172 173 self.model.W_ = Wp.reshape(shape) 174 loss_plus[i] = self._loss(X, y) 175 176 self.model.W_ = Wm.reshape(shape) 177 loss_minus[i] = self._loss(X, y) 178 179 grad = ((loss_plus - loss_minus) / (2 * h_vec)).reshape(shape) 180 181 # Add elastic net gradient 182 l1_grad = self.alpha * self.l1_ratio * np.sign(W) 183 l2_grad = self.alpha * (1 - self.l1_ratio) * W 184 grad += l1_grad + l2_grad 185 186 self.model.W_ = W # restore original 187 return grad 188 189 def fit( 190 self, 191 X, 192 y, 193 epochs=10, 194 verbose=True, 195 show_progress=True, 196 sample_weight=None, 197 **kwargs, 198 ): 199 """ 200 Fit the model using finite difference optimization. 201 202 Parameters 203 ---------- 204 205 X : array-like of shape (n_samples, n_features) 206 Training data. 207 208 y : array-like of shape (n_samples,) 209 Target values. 210 211 epochs : int, optional 212 Number of optimization steps (default=10). 213 214 verbose : bool, optional 215 Whether to print progress messages (default=True). 216 217 show_progress : bool, optional 218 Whether to show tqdm progress bar (default=True). 219 220 sample_weight : array-like, optional 221 Sample weights. 222 223 **kwargs 224 Additional keyword arguments. 225 226 Returns 227 ------- 228 229 self : object 230 Returns self. 231 """ 232 233 self.model.fit(X, y) 234 235 iterator = tqdm(range(epochs)) if show_progress else range(epochs) 236 237 for epoch in iterator: 238 grad = self._compute_grad(X, y) 239 240 if self.optimizer == "gd": 241 self.model.W_ -= self.lr * grad 242 self.model.W_ = np.clip(self.model.W_, 0, 1) 243 # print("self.model.W_", self.model.W_) 244 245 elif self.optimizer == "sgd": 246 # Sample a mini-batch for stochastic gradient 247 n_samples = X.shape[0] 248 idxs = np.random.choice( 249 n_samples, self.batch_size, replace=False 250 ) 251 if isinstance(X, pd.DataFrame): 252 X_batch = X.iloc[idxs, :] 253 else: 254 X_batch = X[idxs, :] 255 y_batch = y[idxs] 256 grad = self._compute_grad(X_batch, y_batch) 257 258 self.model.W_ -= self.lr * grad 259 self.model.W_ = np.clip(self.model.W_, 0, 1) 260 261 elif self.optimizer == "adam": 262 if self.opt_state is None: 263 self.opt_state = { 264 "m": np.zeros_like(grad), 265 "v": np.zeros_like(grad), 266 "t": 0, 267 } 268 beta1, beta2, eps = 0.9, 0.999, 1e-8 269 self.opt_state["t"] += 1 270 self.opt_state["m"] = ( 271 beta1 * self.opt_state["m"] + (1 - beta1) * grad 272 ) 273 self.opt_state["v"] = beta2 * self.opt_state["v"] + ( 274 1 - beta2 275 ) * (grad**2) 276 m_hat = self.opt_state["m"] / (1 - beta1 ** self.opt_state["t"]) 277 v_hat = self.opt_state["v"] / (1 - beta2 ** self.opt_state["t"]) 278 279 self.model.W_ -= self.lr * m_hat / (np.sqrt(v_hat) + eps) 280 self.model.W_ = np.clip(self.model.W_, 0, 1) 281 # print("self.model.W_", self.model.W_) 282 283 elif self.optimizer == "cd": # coordinate descent 284 W_shape = self.model.W_.shape 285 W_flat_size = self.model.W_.size 286 W_flat = self.model.W_.flatten() 287 grad_flat = grad.flatten() 288 289 # Update only one coordinate per epoch (cyclic) 290 idx = self._cd_index % W_flat_size 291 W_flat[idx] -= self.lr * grad_flat[idx] 292 # Clip the updated value 293 W_flat[idx] = np.clip(W_flat[idx], 0, 1) 294 295 # Restore W_ 296 self.model.W_ = W_flat.reshape(W_shape) 297 298 self._cd_index += 1 299 300 else: 301 raise ValueError(f"Unsupported optimizer: {self.optimizer}") 302 303 loss = self._loss(X, y) 304 self.loss_history_.append(loss) 305 306 if verbose: 307 print(f"Epoch {epoch+1}: Loss = {loss:.6f}") 308 309 # if sample_weights, else: (must use self.row_index) 310 if sample_weight in kwargs: 311 self.model.fit( 312 X, 313 y, 314 sample_weight=sample_weight[self.index_row_].ravel(), 315 **kwargs, 316 ) 317 318 return self 319 320 return self 321 322 def predict(self, X, level=95, method="splitconformal", **kwargs): 323 """ 324 Predict using the trained model. 325 326 Parameters 327 ---------- 328 329 X : array-like of shape (n_samples, n_features) 330 Input data. 331 332 level : int, optional 333 Level of confidence for prediction intervals (default=95). 334 335 method : {'splitconformal', 'localconformal'}, optional 336 Method for conformal prediction (default='splitconformal'). 337 338 **kwargs 339 Additional keyword arguments. Use `return_pi=True` for prediction intervals, 340 or `return_std=True` for standard deviation estimates. 341 342 Returns 343 ------- 344 345 array or tuple 346 Model predictions, or a tuple with prediction intervals or standard deviations if requested. 347 """ 348 if "return_std" in kwargs: 349 alpha = 100 - level 350 pi_multiplier = norm.ppf(1 - alpha / 200) 351 352 if len(X.shape) == 1: 353 n_features = X.shape[0] 354 new_X = mo.rbind( 355 X.reshape(1, n_features), 356 np.ones(n_features).reshape(1, n_features), 357 ) 358 359 mean_, std_ = self.model.predict(new_X, return_std=True)[0] 360 361 preds = mean_ 362 lower = mean_ - pi_multiplier * std_ 363 upper = mean_ + pi_multiplier * std_ 364 365 DescribeResults = namedtuple( 366 "DescribeResults", ["mean", "std", "lower", "upper"] 367 ) 368 369 return DescribeResults(preds, std_, lower, upper) 370 371 # len(X.shape) > 1 372 mean_, std_ = self.model.predict(X, return_std=True) 373 374 preds = mean_ 375 lower = mean_ - pi_multiplier * std_ 376 upper = mean_ + pi_multiplier * std_ 377 378 DescribeResults = namedtuple( 379 "DescribeResults", ["mean", "std", "lower", "upper"] 380 ) 381 382 return DescribeResults(preds, std_, lower, upper) 383 384 if "return_pi" in kwargs: 385 assert method in ( 386 "splitconformal", 387 "localconformal", 388 ), "method must be in ('splitconformal', 'localconformal')" 389 self.pi = ns.PredictionInterval( 390 obj=self, 391 method=method, 392 level=level, 393 type_pi=self.type_pi, 394 replications=self.replications, 395 kernel=self.kernel, 396 ) 397 398 if len(self.X_.shape) == 1: 399 if isinstance(X, pd.DataFrame): 400 self.X_ = pd.DataFrame( 401 self.X_.values.reshape(1, -1), columns=self.X_.columns 402 ) 403 else: 404 self.X_ = self.X_.reshape(1, -1) 405 self.y_ = np.array([self.y_]) 406 407 self.pi.fit(self.X_, self.y_) 408 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 409 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 410 preds = self.pi.predict(X, return_pi=True) 411 return preds 412 413 # "return_std" not in kwargs 414 if len(X.shape) == 1: 415 n_features = X.shape[0] 416 new_X = mo.rbind( 417 X.reshape(1, n_features), 418 np.ones(n_features).reshape(1, n_features), 419 ) 420 421 return (0 + self.model.predict(new_X, **kwargs))[0] 422 423 # len(X.shape) > 1 424 return self.model.predict(X, **kwargs)
Finite difference trainer for nnetsauce models.
Parameters
base_model : str The name of the base model (e.g., 'RidgeCV').
type_grad : {'finitediff', 'autodiff'}, optional Type of gradient computation to use (default='finitediff').
lr : float, optional Learning rate for optimization (default=1e-4).
optimizer : {'gd', 'sgd', 'adam', 'cd'}, optional Optimization algorithm: gradient descent ('gd'), stochastic gradient descent ('sgd'), Adam ('adam'), or coordinate descent ('cd'). Default is 'gd'.
eps : float, optional Scaling factor for adaptive finite difference step size (default=1e-3).
batch_size : int, optional Batch size for 'sgd' optimizer (default=32).
alpha : float, optional Elastic net penalty strength (default=0.0).
l1_ratio : float, optional Elastic net mixing parameter (0 = Ridge, 1 = Lasso, default=0.0).
type_loss : {'mse', 'quantile'}, optional Type of loss function to use (default='mse').
q : float, optional Quantile for quantile loss (default=0.5).
**kwargs Additional parameters to pass to the scikit-learn model.
189 def fit( 190 self, 191 X, 192 y, 193 epochs=10, 194 verbose=True, 195 show_progress=True, 196 sample_weight=None, 197 **kwargs, 198 ): 199 """ 200 Fit the model using finite difference optimization. 201 202 Parameters 203 ---------- 204 205 X : array-like of shape (n_samples, n_features) 206 Training data. 207 208 y : array-like of shape (n_samples,) 209 Target values. 210 211 epochs : int, optional 212 Number of optimization steps (default=10). 213 214 verbose : bool, optional 215 Whether to print progress messages (default=True). 216 217 show_progress : bool, optional 218 Whether to show tqdm progress bar (default=True). 219 220 sample_weight : array-like, optional 221 Sample weights. 222 223 **kwargs 224 Additional keyword arguments. 225 226 Returns 227 ------- 228 229 self : object 230 Returns self. 231 """ 232 233 self.model.fit(X, y) 234 235 iterator = tqdm(range(epochs)) if show_progress else range(epochs) 236 237 for epoch in iterator: 238 grad = self._compute_grad(X, y) 239 240 if self.optimizer == "gd": 241 self.model.W_ -= self.lr * grad 242 self.model.W_ = np.clip(self.model.W_, 0, 1) 243 # print("self.model.W_", self.model.W_) 244 245 elif self.optimizer == "sgd": 246 # Sample a mini-batch for stochastic gradient 247 n_samples = X.shape[0] 248 idxs = np.random.choice( 249 n_samples, self.batch_size, replace=False 250 ) 251 if isinstance(X, pd.DataFrame): 252 X_batch = X.iloc[idxs, :] 253 else: 254 X_batch = X[idxs, :] 255 y_batch = y[idxs] 256 grad = self._compute_grad(X_batch, y_batch) 257 258 self.model.W_ -= self.lr * grad 259 self.model.W_ = np.clip(self.model.W_, 0, 1) 260 261 elif self.optimizer == "adam": 262 if self.opt_state is None: 263 self.opt_state = { 264 "m": np.zeros_like(grad), 265 "v": np.zeros_like(grad), 266 "t": 0, 267 } 268 beta1, beta2, eps = 0.9, 0.999, 1e-8 269 self.opt_state["t"] += 1 270 self.opt_state["m"] = ( 271 beta1 * self.opt_state["m"] + (1 - beta1) * grad 272 ) 273 self.opt_state["v"] = beta2 * self.opt_state["v"] + ( 274 1 - beta2 275 ) * (grad**2) 276 m_hat = self.opt_state["m"] / (1 - beta1 ** self.opt_state["t"]) 277 v_hat = self.opt_state["v"] / (1 - beta2 ** self.opt_state["t"]) 278 279 self.model.W_ -= self.lr * m_hat / (np.sqrt(v_hat) + eps) 280 self.model.W_ = np.clip(self.model.W_, 0, 1) 281 # print("self.model.W_", self.model.W_) 282 283 elif self.optimizer == "cd": # coordinate descent 284 W_shape = self.model.W_.shape 285 W_flat_size = self.model.W_.size 286 W_flat = self.model.W_.flatten() 287 grad_flat = grad.flatten() 288 289 # Update only one coordinate per epoch (cyclic) 290 idx = self._cd_index % W_flat_size 291 W_flat[idx] -= self.lr * grad_flat[idx] 292 # Clip the updated value 293 W_flat[idx] = np.clip(W_flat[idx], 0, 1) 294 295 # Restore W_ 296 self.model.W_ = W_flat.reshape(W_shape) 297 298 self._cd_index += 1 299 300 else: 301 raise ValueError(f"Unsupported optimizer: {self.optimizer}") 302 303 loss = self._loss(X, y) 304 self.loss_history_.append(loss) 305 306 if verbose: 307 print(f"Epoch {epoch+1}: Loss = {loss:.6f}") 308 309 # if sample_weights, else: (must use self.row_index) 310 if sample_weight in kwargs: 311 self.model.fit( 312 X, 313 y, 314 sample_weight=sample_weight[self.index_row_].ravel(), 315 **kwargs, 316 ) 317 318 return self 319 320 return self
Fit the model using finite difference optimization.
Parameters
X : array-like of shape (n_samples, n_features) Training data.
y : array-like of shape (n_samples,) Target values.
epochs : int, optional Number of optimization steps (default=10).
verbose : bool, optional Whether to print progress messages (default=True).
show_progress : bool, optional Whether to show tqdm progress bar (default=True).
sample_weight : array-like, optional Sample weights.
**kwargs Additional keyword arguments.
Returns
self : object Returns self.
322 def predict(self, X, level=95, method="splitconformal", **kwargs): 323 """ 324 Predict using the trained model. 325 326 Parameters 327 ---------- 328 329 X : array-like of shape (n_samples, n_features) 330 Input data. 331 332 level : int, optional 333 Level of confidence for prediction intervals (default=95). 334 335 method : {'splitconformal', 'localconformal'}, optional 336 Method for conformal prediction (default='splitconformal'). 337 338 **kwargs 339 Additional keyword arguments. Use `return_pi=True` for prediction intervals, 340 or `return_std=True` for standard deviation estimates. 341 342 Returns 343 ------- 344 345 array or tuple 346 Model predictions, or a tuple with prediction intervals or standard deviations if requested. 347 """ 348 if "return_std" in kwargs: 349 alpha = 100 - level 350 pi_multiplier = norm.ppf(1 - alpha / 200) 351 352 if len(X.shape) == 1: 353 n_features = X.shape[0] 354 new_X = mo.rbind( 355 X.reshape(1, n_features), 356 np.ones(n_features).reshape(1, n_features), 357 ) 358 359 mean_, std_ = self.model.predict(new_X, return_std=True)[0] 360 361 preds = mean_ 362 lower = mean_ - pi_multiplier * std_ 363 upper = mean_ + pi_multiplier * std_ 364 365 DescribeResults = namedtuple( 366 "DescribeResults", ["mean", "std", "lower", "upper"] 367 ) 368 369 return DescribeResults(preds, std_, lower, upper) 370 371 # len(X.shape) > 1 372 mean_, std_ = self.model.predict(X, return_std=True) 373 374 preds = mean_ 375 lower = mean_ - pi_multiplier * std_ 376 upper = mean_ + pi_multiplier * std_ 377 378 DescribeResults = namedtuple( 379 "DescribeResults", ["mean", "std", "lower", "upper"] 380 ) 381 382 return DescribeResults(preds, std_, lower, upper) 383 384 if "return_pi" in kwargs: 385 assert method in ( 386 "splitconformal", 387 "localconformal", 388 ), "method must be in ('splitconformal', 'localconformal')" 389 self.pi = ns.PredictionInterval( 390 obj=self, 391 method=method, 392 level=level, 393 type_pi=self.type_pi, 394 replications=self.replications, 395 kernel=self.kernel, 396 ) 397 398 if len(self.X_.shape) == 1: 399 if isinstance(X, pd.DataFrame): 400 self.X_ = pd.DataFrame( 401 self.X_.values.reshape(1, -1), columns=self.X_.columns 402 ) 403 else: 404 self.X_ = self.X_.reshape(1, -1) 405 self.y_ = np.array([self.y_]) 406 407 self.pi.fit(self.X_, self.y_) 408 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 409 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 410 preds = self.pi.predict(X, return_pi=True) 411 return preds 412 413 # "return_std" not in kwargs 414 if len(X.shape) == 1: 415 n_features = X.shape[0] 416 new_X = mo.rbind( 417 X.reshape(1, n_features), 418 np.ones(n_features).reshape(1, n_features), 419 ) 420 421 return (0 + self.model.predict(new_X, **kwargs))[0] 422 423 # len(X.shape) > 1 424 return self.model.predict(X, **kwargs)
Predict using the trained model.
Parameters
X : array-like of shape (n_samples, n_features) Input data.
level : int, optional Level of confidence for prediction intervals (default=95).
method : {'splitconformal', 'localconformal'}, optional Method for conformal prediction (default='splitconformal').
**kwargs
Additional keyword arguments. Use return_pi=True for prediction intervals,
or return_std=True for standard deviation estimates.
Returns
array or tuple Model predictions, or a tuple with prediction intervals or standard deviations if requested.
36class DeepClassifier(CustomClassifier, ClassifierMixin): 37 """ 38 Deep Classifier 39 40 Parameters: 41 42 obj: an object 43 A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification 44 45 n_layers: int (default=3) 46 Number of layers. `n_layers = 1` is a simple `CustomClassifier` 47 48 verbose : int, optional (default=0) 49 Monitor progress when fitting. 50 51 All the other parameters are nnetsauce `CustomClassifier`'s 52 53 Examples: 54 55 ```python 56 import nnetsauce as ns 57 from sklearn.datasets import load_breast_cancer 58 from sklearn.model_selection import train_test_split 59 from sklearn.linear_model import LogisticRegressionCV 60 data = load_breast_cancer() 61 X = data.data 62 y= data.target 63 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123) 64 obj = LogisticRegressionCV() 65 clf = ns.DeepClassifier(obj) 66 clf.fit(X_train, y_train) 67 print(clf.score(clf.predict(X_test), y_test)) 68 ``` 69 """ 70 71 _estimator_type = "classifier" 72 73 def __init__( 74 self, 75 obj, 76 # Defining depth 77 n_layers=3, 78 verbose=0, 79 # CustomClassifier attributes 80 n_hidden_features=5, 81 activation_name="relu", 82 a=0.01, 83 nodes_sim="sobol", 84 bias=True, 85 dropout=0, 86 direct_link=True, 87 n_clusters=2, 88 cluster_encode=True, 89 type_clust="kmeans", 90 type_scaling=("std", "std", "std"), 91 col_sample=1, 92 row_sample=1, 93 cv_calibration=2, 94 calibration_method="sigmoid", 95 seed=123, 96 backend="cpu", 97 ): 98 super().__init__( 99 obj=obj, 100 n_hidden_features=n_hidden_features, 101 activation_name=activation_name, 102 a=a, 103 nodes_sim=nodes_sim, 104 bias=bias, 105 dropout=dropout, 106 direct_link=direct_link, 107 n_clusters=n_clusters, 108 cluster_encode=cluster_encode, 109 type_clust=type_clust, 110 type_scaling=type_scaling, 111 col_sample=col_sample, 112 row_sample=row_sample, 113 seed=seed, 114 backend=backend, 115 ) 116 self.coef_ = None 117 self.intercept_ = None 118 self.type_fit = "classification" 119 self.cv_calibration = cv_calibration 120 self.calibration_method = calibration_method 121 122 # Only wrap in CalibratedClassifierCV if not already wrapped 123 # if not isinstance(obj, CalibratedClassifierCV): 124 # self.obj = CalibratedClassifierCV( 125 # self.obj, 126 # cv=self.cv_calibration, 127 # method=self.calibration_method 128 # ) 129 # else: 130 self.coef_ = None 131 self.intercept_ = None 132 self.type_fit = "classification" 133 self.cv_calibration = cv_calibration 134 self.calibration_method = calibration_method 135 self.obj = obj 136 137 assert n_layers >= 1, "must have n_layers >= 1" 138 self.stacked_obj = obj 139 self.verbose = verbose 140 self.n_layers = n_layers 141 self.classes_ = None 142 self.n_classes_ = None 143 144 def fit(self, X, y, **kwargs): 145 """Fit Classification algorithms to X and y. 146 Parameters 147 ---------- 148 X : array-like, 149 Training vectors, where rows is the number of samples 150 and columns is the number of features. 151 y : array-like, 152 Training vectors, where rows is the number of samples 153 and columns is the number of features. 154 **kwargs: dict 155 Additional parameters to be passed to the fit method 156 of the base learner. For example, `sample_weight`. 157 158 Returns 159 ------- 160 A fitted object 161 """ 162 163 self.classes_ = np.unique(y) 164 self.n_classes_ = len( 165 self.classes_ 166 ) # for compatibility with scikit-learn 167 168 if isinstance(X, np.ndarray): 169 X = pd.DataFrame(X) 170 171 # init layer 172 self.stacked_obj = CustomClassifier( 173 obj=self.stacked_obj, 174 n_hidden_features=self.n_hidden_features, 175 activation_name=self.activation_name, 176 a=self.a, 177 nodes_sim=self.nodes_sim, 178 bias=self.bias, 179 dropout=self.dropout, 180 direct_link=self.direct_link, 181 n_clusters=self.n_clusters, 182 cluster_encode=self.cluster_encode, 183 type_clust=self.type_clust, 184 type_scaling=self.type_scaling, 185 col_sample=self.col_sample, 186 row_sample=self.row_sample, 187 cv_calibration=None, 188 calibration_method=None, 189 seed=self.seed, 190 backend=self.backend, 191 ) 192 193 if self.verbose > 0: 194 iterator = tqdm(range(self.n_layers - 1)) 195 else: 196 iterator = range(self.n_layers - 1) 197 198 for _ in iterator: 199 self.stacked_obj = deepcopy( 200 CustomClassifier( 201 obj=self.stacked_obj, 202 n_hidden_features=self.n_hidden_features, 203 activation_name=self.activation_name, 204 a=self.a, 205 nodes_sim=self.nodes_sim, 206 bias=self.bias, 207 dropout=self.dropout, 208 direct_link=self.direct_link, 209 n_clusters=self.n_clusters, 210 cluster_encode=self.cluster_encode, 211 type_clust=self.type_clust, 212 type_scaling=self.type_scaling, 213 col_sample=self.col_sample, 214 row_sample=self.row_sample, 215 cv_calibration=None, 216 calibration_method=None, 217 seed=self.seed, 218 backend=self.backend, 219 ) 220 ) 221 self.stacked_obj.fit(X, y, **kwargs) 222 223 return self 224 225 def partial_fit(self, X, y, **kwargs): 226 """Fit Regression algorithms to X and y. 227 Parameters 228 ---------- 229 X : array-like, 230 Training vectors, where rows is the number of samples 231 and columns is the number of features. 232 y : array-like, 233 Training vectors, where rows is the number of samples 234 and columns is the number of features. 235 **kwargs: dict 236 Additional parameters to be passed to the fit method 237 of the base learner. For example, `sample_weight`. 238 Returns 239 ------- 240 A fitted object 241 """ 242 assert hasattr(self, "stacked_obj"), "model must be fitted first" 243 current_obj = self.stacked_obj 244 for _ in range(self.n_layers): 245 try: 246 input_X = current_obj.obj.cook_test_set(X) 247 current_obj.obj.partial_fit(input_X, y, **kwargs) 248 try: 249 current_obj = current_obj.obj 250 except AttributeError: 251 pass 252 except ValueError: 253 pass 254 return self 255 256 def predict(self, X): 257 return self.stacked_obj.predict(X) 258 259 def predict_proba(self, X): 260 return self.stacked_obj.predict_proba(X) 261 262 def score(self, X, y, scoring=None): 263 return self.stacked_obj.score(X, y, scoring) 264 265 def cross_val_optim( 266 self, 267 X_train, 268 y_train, 269 X_test=None, 270 y_test=None, 271 scoring="accuracy", 272 surrogate_obj=None, 273 cv=5, 274 n_jobs=None, 275 n_init=10, 276 n_iter=190, 277 abs_tol=1e-3, 278 verbose=2, 279 seed=123, 280 **kwargs, 281 ): 282 """Cross-validation function and hyperparameters' search 283 284 Parameters: 285 286 X_train: array-like, 287 Training vectors, where rows is the number of samples 288 and columns is the number of features. 289 290 y_train: array-like, 291 Training vectors, where rows is the number of samples 292 and columns is the number of features. 293 294 X_test: array-like, 295 Testing vectors, where rows is the number of samples 296 and columns is the number of features. 297 298 y_test: array-like, 299 Testing vectors, where rows is the number of samples 300 and columns is the number of features. 301 302 scoring: str 303 scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules 304 305 surrogate_obj: an object; 306 An ML model for estimating the uncertainty around the objective function 307 308 cv: int; 309 number of cross-validation folds 310 311 n_jobs: int; 312 number of jobs for parallel execution 313 314 n_init: an integer; 315 number of points in the initial setting, when `x_init` and `y_init` are not provided 316 317 n_iter: an integer; 318 number of iterations of the minimization algorithm 319 320 abs_tol: a float; 321 tolerance for convergence of the optimizer (early stopping based on acquisition function) 322 323 verbose: int 324 controls verbosity 325 326 seed: int 327 reproducibility seed 328 329 **kwargs: dict 330 additional parameters to be passed to the estimator 331 332 Examples: 333 334 ```python 335 ``` 336 """ 337 338 num_to_activation_name = {1: "relu", 2: "sigmoid", 3: "tanh"} 339 num_to_nodes_sim = {1: "sobol", 2: "uniform", 3: "hammersley"} 340 num_to_type_clust = {1: "kmeans", 2: "gmm"} 341 342 def deepclassifier_cv( 343 X_train, 344 y_train, 345 # Defining depth 346 n_layers=3, 347 # CustomClassifier attributes 348 n_hidden_features=5, 349 activation_name="relu", 350 nodes_sim="sobol", 351 dropout=0, 352 n_clusters=2, 353 type_clust="kmeans", 354 cv=5, 355 n_jobs=None, 356 scoring="accuracy", 357 seed=123, 358 ): 359 self.set_params( 360 **{ 361 "n_layers": n_layers, 362 # CustomClassifier attributes 363 "n_hidden_features": n_hidden_features, 364 "activation_name": activation_name, 365 "nodes_sim": nodes_sim, 366 "dropout": dropout, 367 "n_clusters": n_clusters, 368 "type_clust": type_clust, 369 **kwargs, 370 } 371 ) 372 return -cross_val_score( 373 estimator=self, 374 X=X_train, 375 y=y_train, 376 scoring=scoring, 377 cv=cv, 378 n_jobs=n_jobs, 379 verbose=0, 380 ).mean() 381 382 # objective function for hyperparams tuning 383 def crossval_objective(xx): 384 return deepclassifier_cv( 385 X_train=X_train, 386 y_train=y_train, 387 # Defining depth 388 n_layers=int(np.ceil(xx[0])), 389 # CustomClassifier attributes 390 n_hidden_features=int(np.ceil(xx[1])), 391 activation_name=num_to_activation_name[np.ceil(xx[2])], 392 nodes_sim=num_to_nodes_sim[int(np.ceil(xx[3]))], 393 dropout=xx[4], 394 n_clusters=int(np.ceil(xx[5])), 395 type_clust=num_to_type_clust[int(np.ceil(xx[6]))], 396 cv=cv, 397 n_jobs=n_jobs, 398 scoring=scoring, 399 seed=seed, 400 ) 401 402 if surrogate_obj is None: 403 gp_opt = gp.GPOpt( 404 objective_func=crossval_objective, 405 lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]), 406 upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]), 407 params_names=[ 408 "n_layers", 409 # CustomClassifier attributes 410 "n_hidden_features", 411 "activation_name", 412 "nodes_sim", 413 "dropout", 414 "n_clusters", 415 "type_clust", 416 ], 417 method="bayesian", 418 n_init=n_init, 419 n_iter=n_iter, 420 seed=seed, 421 ) 422 else: 423 gp_opt = gp.GPOpt( 424 objective_func=crossval_objective, 425 lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]), 426 upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]), 427 params_names=[ 428 "n_layers", 429 # CustomClassifier attributes 430 "n_hidden_features", 431 "activation_name", 432 "nodes_sim", 433 "dropout", 434 "n_clusters", 435 "type_clust", 436 ], 437 acquisition="ucb", 438 method="splitconformal", 439 surrogate_obj=ns.PredictionInterval( 440 obj=surrogate_obj, method="splitconformal" 441 ), 442 n_init=n_init, 443 n_iter=n_iter, 444 seed=seed, 445 ) 446 447 res = gp_opt.optimize(verbose=verbose, abs_tol=abs_tol) 448 res.best_params["n_layers"] = int(np.ceil(res.best_params["n_layers"])) 449 res.best_params["n_hidden_features"] = int( 450 np.ceil(res.best_params["n_hidden_features"]) 451 ) 452 res.best_params["activation_name"] = num_to_activation_name[ 453 np.ceil(res.best_params["activation_name"]) 454 ] 455 res.best_params["nodes_sim"] = num_to_nodes_sim[ 456 int(np.ceil(res.best_params["nodes_sim"])) 457 ] 458 res.best_params["dropout"] = res.best_params["dropout"] 459 res.best_params["n_clusters"] = int( 460 np.ceil(res.best_params["n_clusters"]) 461 ) 462 res.best_params["type_clust"] = num_to_type_clust[ 463 int(np.ceil(res.best_params["type_clust"])) 464 ] 465 466 # out-of-sample error 467 if X_test is not None and y_test is not None: 468 self.set_params(**res.best_params, verbose=0, seed=seed) 469 preds = self.fit(X_train, y_train).predict(X_test) 470 # check error on y_test 471 oos_err = getattr(metrics, scoring + "_score")( 472 y_true=y_test, y_pred=preds 473 ) 474 result = namedtuple("result", res._fields + ("test_" + scoring,)) 475 return result(*res, oos_err) 476 else: 477 return res 478 479 def lazy_cross_val_optim( 480 self, 481 X_train, 482 y_train, 483 X_test=None, 484 y_test=None, 485 scoring="accuracy", 486 surrogate_objs=None, 487 customize=False, 488 cv=5, 489 n_jobs=None, 490 n_init=10, 491 n_iter=190, 492 abs_tol=1e-3, 493 verbose=1, 494 seed=123, 495 ): 496 """Automated Cross-validation function and hyperparameters' search using multiple surrogates 497 498 Parameters: 499 500 X_train: array-like, 501 Training vectors, where rows is the number of samples 502 and columns is the number of features. 503 504 y_train: array-like, 505 Training vectors, where rows is the number of samples 506 and columns is the number of features. 507 508 X_test: array-like, 509 Testing vectors, where rows is the number of samples 510 and columns is the number of features. 511 512 y_test: array-like, 513 Testing vectors, where rows is the number of samples 514 and columns is the number of features. 515 516 scoring: str 517 scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules 518 519 surrogate_objs: object names as a list of strings; 520 ML models for estimating the uncertainty around the objective function 521 522 customize: boolean 523 if True, the surrogate is transformed into a quasi-randomized network (default is False) 524 525 cv: int; 526 number of cross-validation folds 527 528 n_jobs: int; 529 number of jobs for parallel execution 530 531 n_init: an integer; 532 number of points in the initial setting, when `x_init` and `y_init` are not provided 533 534 n_iter: an integer; 535 number of iterations of the minimization algorithm 536 537 abs_tol: a float; 538 tolerance for convergence of the optimizer (early stopping based on acquisition function) 539 540 verbose: int 541 controls verbosity 542 543 seed: int 544 reproducibility seed 545 546 Examples: 547 548 ```python 549 ``` 550 """ 551 552 removed_regressors = [ 553 "TheilSenRegressor", 554 "ARDRegression", 555 "CCA", 556 "GaussianProcessRegressor", 557 "GradientBoostingRegressor", 558 "HistGradientBoostingRegressor", 559 "IsotonicRegression", 560 "MultiOutputRegressor", 561 "MultiTaskElasticNet", 562 "MultiTaskElasticNetCV", 563 "MultiTaskLasso", 564 "MultiTaskLassoCV", 565 "OrthogonalMatchingPursuit", 566 "OrthogonalMatchingPursuitCV", 567 "PLSCanonical", 568 "PLSRegression", 569 "RadiusNeighborsRegressor", 570 "RegressorChain", 571 "StackingRegressor", 572 "VotingRegressor", 573 ] 574 575 results = [] 576 577 for est in all_estimators(): 578 if surrogate_objs is None: 579 if issubclass(est[1], RegressorMixin) and ( 580 est[0] not in removed_regressors 581 ): 582 try: 583 if customize == True: 584 surr_obj = ns.CustomClassifier(obj=est[1]()) 585 else: 586 surr_obj = est[1]() 587 res = self.cross_val_optim( 588 X_train=X_train, 589 y_train=y_train, 590 X_test=X_test, 591 y_test=y_test, 592 surrogate_obj=surr_obj, 593 cv=cv, 594 n_jobs=n_jobs, 595 scoring=scoring, 596 n_init=n_init, 597 n_iter=n_iter, 598 abs_tol=abs_tol, 599 verbose=verbose, 600 seed=seed, 601 ) 602 if customize == True: 603 results.append((f"CustomClassifier({est[0]})", res)) 604 else: 605 results.append((est[0], res)) 606 except: 607 pass 608 609 else: 610 if ( 611 issubclass(est[1], RegressorMixin) 612 and (est[0] not in removed_regressors) 613 and est[0] in surrogate_objs 614 ): 615 try: 616 if customize == True: 617 surr_obj = ns.CustomClassifier(obj=est[1]()) 618 else: 619 surr_obj = est[1]() 620 res = self.cross_val_optim( 621 X_train=X_train, 622 y_train=y_train, 623 X_test=X_test, 624 y_test=y_test, 625 surrogate_obj=surr_obj, 626 cv=cv, 627 n_jobs=n_jobs, 628 scoring=scoring, 629 n_init=n_init, 630 n_iter=n_iter, 631 abs_tol=abs_tol, 632 verbose=verbose, 633 seed=seed, 634 ) 635 if customize == True: 636 results.append((f"CustomClassifier({est[0]})", res)) 637 else: 638 results.append((est[0], res)) 639 except: 640 pass 641 642 return results 643 644 @property 645 def _estimator_type(self): 646 return "classifier"
Deep Classifier
Parameters:
obj: an object
A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
n_layers: int (default=3)
Number of layers. `n_layers = 1` is a simple `CustomClassifier`
verbose : int, optional (default=0)
Monitor progress when fitting.
All the other parameters are nnetsauce `CustomClassifier`'s
Examples:
import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegressionCV
data = load_breast_cancer()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
obj = LogisticRegressionCV()
clf = ns.DeepClassifier(obj)
clf.fit(X_train, y_train)
print(clf.score(clf.predict(X_test), y_test))
144 def fit(self, X, y, **kwargs): 145 """Fit Classification algorithms to X and y. 146 Parameters 147 ---------- 148 X : array-like, 149 Training vectors, where rows is the number of samples 150 and columns is the number of features. 151 y : array-like, 152 Training vectors, where rows is the number of samples 153 and columns is the number of features. 154 **kwargs: dict 155 Additional parameters to be passed to the fit method 156 of the base learner. For example, `sample_weight`. 157 158 Returns 159 ------- 160 A fitted object 161 """ 162 163 self.classes_ = np.unique(y) 164 self.n_classes_ = len( 165 self.classes_ 166 ) # for compatibility with scikit-learn 167 168 if isinstance(X, np.ndarray): 169 X = pd.DataFrame(X) 170 171 # init layer 172 self.stacked_obj = CustomClassifier( 173 obj=self.stacked_obj, 174 n_hidden_features=self.n_hidden_features, 175 activation_name=self.activation_name, 176 a=self.a, 177 nodes_sim=self.nodes_sim, 178 bias=self.bias, 179 dropout=self.dropout, 180 direct_link=self.direct_link, 181 n_clusters=self.n_clusters, 182 cluster_encode=self.cluster_encode, 183 type_clust=self.type_clust, 184 type_scaling=self.type_scaling, 185 col_sample=self.col_sample, 186 row_sample=self.row_sample, 187 cv_calibration=None, 188 calibration_method=None, 189 seed=self.seed, 190 backend=self.backend, 191 ) 192 193 if self.verbose > 0: 194 iterator = tqdm(range(self.n_layers - 1)) 195 else: 196 iterator = range(self.n_layers - 1) 197 198 for _ in iterator: 199 self.stacked_obj = deepcopy( 200 CustomClassifier( 201 obj=self.stacked_obj, 202 n_hidden_features=self.n_hidden_features, 203 activation_name=self.activation_name, 204 a=self.a, 205 nodes_sim=self.nodes_sim, 206 bias=self.bias, 207 dropout=self.dropout, 208 direct_link=self.direct_link, 209 n_clusters=self.n_clusters, 210 cluster_encode=self.cluster_encode, 211 type_clust=self.type_clust, 212 type_scaling=self.type_scaling, 213 col_sample=self.col_sample, 214 row_sample=self.row_sample, 215 cv_calibration=None, 216 calibration_method=None, 217 seed=self.seed, 218 backend=self.backend, 219 ) 220 ) 221 self.stacked_obj.fit(X, y, **kwargs) 222 223 return self
Fit Classification algorithms to X and y.
Parameters
X : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
**kwargs: dict
Additional parameters to be passed to the fit method
of the base learner. For example, sample_weight.
Returns
A fitted object
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
13class DeepRegressor(CustomRegressor, RegressorMixin): 14 """ 15 Deep Regressor 16 17 Parameters: 18 19 obj: an object 20 A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification 21 22 verbose : int, optional (default=0) 23 Monitor progress when fitting. 24 25 n_layers: int (default=2) 26 Number of layers. `n_layers = 1` is a simple `CustomRegressor` 27 28 All the other parameters are nnetsauce `CustomRegressor`'s 29 30 Examples: 31 32 ```python 33 import nnetsauce as ns 34 from sklearn.datasets import load_diabetes 35 from sklearn.model_selection import train_test_split 36 from sklearn.linear_model import RidgeCV 37 data = load_diabetes() 38 X = data.data 39 y= data.target 40 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123) 41 obj = RidgeCV() 42 clf = ns.DeepRegressor(obj) 43 clf.fit(X_train, y_train) 44 print(clf.score(clf.predict(X_test), y_test)) 45 ``` 46 47 """ 48 49 def __init__( 50 self, 51 obj, 52 # Defining depth 53 n_layers=2, 54 verbose=0, 55 # CustomRegressor attributes 56 n_hidden_features=5, 57 activation_name="relu", 58 a=0.01, 59 nodes_sim="sobol", 60 bias=True, 61 dropout=0, 62 direct_link=True, 63 n_clusters=2, 64 cluster_encode=True, 65 type_clust="kmeans", 66 type_scaling=("std", "std", "std"), 67 col_sample=1, 68 row_sample=1, 69 level=None, 70 pi_method="splitconformal", 71 seed=123, 72 backend="cpu", 73 ): 74 super().__init__( 75 obj=obj, 76 n_hidden_features=n_hidden_features, 77 activation_name=activation_name, 78 a=a, 79 nodes_sim=nodes_sim, 80 bias=bias, 81 dropout=dropout, 82 direct_link=direct_link, 83 n_clusters=n_clusters, 84 cluster_encode=cluster_encode, 85 type_clust=type_clust, 86 type_scaling=type_scaling, 87 col_sample=col_sample, 88 row_sample=row_sample, 89 level=level, 90 pi_method=pi_method, 91 seed=seed, 92 backend=backend, 93 ) 94 95 assert n_layers >= 1, "must have n_layers >= 1" 96 97 self.stacked_obj = deepcopy(obj) 98 self.verbose = verbose 99 self.n_layers = n_layers 100 self.level = level 101 self.pi_method = pi_method 102 self.coef_ = None 103 104 def fit(self, X, y, **kwargs): 105 """Fit Regression algorithms to X and y. 106 Parameters 107 ---------- 108 X : array-like, 109 Training vectors, where rows is the number of samples 110 and columns is the number of features. 111 y : array-like, 112 Training vectors, where rows is the number of samples 113 and columns is the number of features. 114 **kwargs: dict 115 Additional parameters to be passed to the fit method 116 of the base learner. For example, `sample_weight`. 117 Returns 118 ------- 119 A fitted object 120 """ 121 122 if isinstance(X, np.ndarray): 123 X = pd.DataFrame(X) 124 125 # init layer 126 self.stacked_obj = CustomRegressor( 127 obj=self.stacked_obj, 128 n_hidden_features=self.n_hidden_features, 129 activation_name=self.activation_name, 130 a=self.a, 131 nodes_sim=self.nodes_sim, 132 bias=self.bias, 133 dropout=self.dropout, 134 direct_link=self.direct_link, 135 n_clusters=self.n_clusters, 136 cluster_encode=self.cluster_encode, 137 type_clust=self.type_clust, 138 type_scaling=self.type_scaling, 139 col_sample=self.col_sample, 140 row_sample=self.row_sample, 141 seed=self.seed, 142 backend=self.backend, 143 ) 144 145 if self.verbose > 0: 146 iterator = tqdm(range(self.n_layers - 1)) 147 else: 148 iterator = range(self.n_layers - 1) 149 150 for _ in iterator: 151 self.stacked_obj = deepcopy( 152 CustomRegressor( 153 obj=self.stacked_obj, 154 n_hidden_features=self.n_hidden_features, 155 activation_name=self.activation_name, 156 a=self.a, 157 nodes_sim=self.nodes_sim, 158 bias=self.bias, 159 dropout=self.dropout, 160 direct_link=self.direct_link, 161 n_clusters=self.n_clusters, 162 cluster_encode=self.cluster_encode, 163 type_clust=self.type_clust, 164 type_scaling=self.type_scaling, 165 col_sample=self.col_sample, 166 row_sample=self.row_sample, 167 seed=self.seed, 168 backend=self.backend, 169 ) 170 ) 171 172 self.stacked_obj.fit(X, y, **kwargs) 173 174 if self.level is not None: 175 self.stacked_obj = PredictionInterval( 176 obj=self.stacked_obj, method=self.pi_method, level=self.level 177 ) 178 179 if hasattr(self.stacked_obj, "clustering_obj_"): 180 self.clustering_obj_ = self.stacked_obj.clustering_obj_ 181 182 if hasattr(self.stacked_obj, "coef_"): 183 self.coef_ = self.stacked_obj.coef_ 184 185 if hasattr(self.stacked_obj, "scaler_"): 186 self.scaler_ = self.stacked_obj.scaler_ 187 188 if hasattr(self.stacked_obj, "nn_scaler_"): 189 self.nn_scaler_ = self.stacked_obj.nn_scaler_ 190 191 if hasattr(self.stacked_obj, "clustering_scaler_"): 192 self.clustering_scaler_ = self.stacked_obj.clustering_scaler_ 193 194 return self 195 196 def partial_fit(self, X, y, **kwargs): 197 """Fit Regression algorithms to X and y. 198 Parameters 199 ---------- 200 X : array-like, 201 Training vectors, where rows is the number of samples 202 and columns is the number of features. 203 y : array-like, 204 Training vectors, where rows is the number of samples 205 and columns is the number of features. 206 **kwargs: dict 207 Additional parameters to be passed to the fit method 208 of the base learner. For example, `sample_weight`. 209 Returns 210 ------- 211 A fitted object 212 """ 213 assert hasattr(self, "stacked_obj"), "model must be fitted first" 214 current_obj = self.stacked_obj 215 for _ in range(self.n_layers): 216 try: 217 input_X = current_obj.obj.cook_test_set(X) 218 current_obj.obj.partial_fit(input_X, y, **kwargs) 219 try: 220 current_obj = current_obj.obj 221 except AttributeError: 222 pass 223 except ValueError as e: 224 print(e) 225 pass 226 return self 227 228 def predict(self, X, **kwargs): 229 if self.level is not None: 230 return self.stacked_obj.predict(X, return_pi=True) 231 return self.stacked_obj.predict(X, **kwargs) 232 233 def score(self, X, y, scoring=None): 234 return self.stacked_obj.score(X, y, scoring)
Deep Regressor
Parameters:
obj: an object
A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
verbose : int, optional (default=0)
Monitor progress when fitting.
n_layers: int (default=2)
Number of layers. `n_layers = 1` is a simple `CustomRegressor`
All the other parameters are nnetsauce `CustomRegressor`'s
Examples:
import nnetsauce as ns
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import RidgeCV
data = load_diabetes()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
obj = RidgeCV()
clf = ns.DeepRegressor(obj)
clf.fit(X_train, y_train)
print(clf.score(clf.predict(X_test), y_test))
104 def fit(self, X, y, **kwargs): 105 """Fit Regression algorithms to X and y. 106 Parameters 107 ---------- 108 X : array-like, 109 Training vectors, where rows is the number of samples 110 and columns is the number of features. 111 y : array-like, 112 Training vectors, where rows is the number of samples 113 and columns is the number of features. 114 **kwargs: dict 115 Additional parameters to be passed to the fit method 116 of the base learner. For example, `sample_weight`. 117 Returns 118 ------- 119 A fitted object 120 """ 121 122 if isinstance(X, np.ndarray): 123 X = pd.DataFrame(X) 124 125 # init layer 126 self.stacked_obj = CustomRegressor( 127 obj=self.stacked_obj, 128 n_hidden_features=self.n_hidden_features, 129 activation_name=self.activation_name, 130 a=self.a, 131 nodes_sim=self.nodes_sim, 132 bias=self.bias, 133 dropout=self.dropout, 134 direct_link=self.direct_link, 135 n_clusters=self.n_clusters, 136 cluster_encode=self.cluster_encode, 137 type_clust=self.type_clust, 138 type_scaling=self.type_scaling, 139 col_sample=self.col_sample, 140 row_sample=self.row_sample, 141 seed=self.seed, 142 backend=self.backend, 143 ) 144 145 if self.verbose > 0: 146 iterator = tqdm(range(self.n_layers - 1)) 147 else: 148 iterator = range(self.n_layers - 1) 149 150 for _ in iterator: 151 self.stacked_obj = deepcopy( 152 CustomRegressor( 153 obj=self.stacked_obj, 154 n_hidden_features=self.n_hidden_features, 155 activation_name=self.activation_name, 156 a=self.a, 157 nodes_sim=self.nodes_sim, 158 bias=self.bias, 159 dropout=self.dropout, 160 direct_link=self.direct_link, 161 n_clusters=self.n_clusters, 162 cluster_encode=self.cluster_encode, 163 type_clust=self.type_clust, 164 type_scaling=self.type_scaling, 165 col_sample=self.col_sample, 166 row_sample=self.row_sample, 167 seed=self.seed, 168 backend=self.backend, 169 ) 170 ) 171 172 self.stacked_obj.fit(X, y, **kwargs) 173 174 if self.level is not None: 175 self.stacked_obj = PredictionInterval( 176 obj=self.stacked_obj, method=self.pi_method, level=self.level 177 ) 178 179 if hasattr(self.stacked_obj, "clustering_obj_"): 180 self.clustering_obj_ = self.stacked_obj.clustering_obj_ 181 182 if hasattr(self.stacked_obj, "coef_"): 183 self.coef_ = self.stacked_obj.coef_ 184 185 if hasattr(self.stacked_obj, "scaler_"): 186 self.scaler_ = self.stacked_obj.scaler_ 187 188 if hasattr(self.stacked_obj, "nn_scaler_"): 189 self.nn_scaler_ = self.stacked_obj.nn_scaler_ 190 191 if hasattr(self.stacked_obj, "clustering_scaler_"): 192 self.clustering_scaler_ = self.stacked_obj.clustering_scaler_ 193 194 return self
Fit Regression algorithms to X and y.
Parameters
X : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
**kwargs: dict
Additional parameters to be passed to the fit method
of the base learner. For example, sample_weight.
Returns
A fitted object
228 def predict(self, X, **kwargs): 229 if self.level is not None: 230 return self.stacked_obj.predict(X, return_pi=True) 231 return self.stacked_obj.predict(X, **kwargs)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
level: int
Level of confidence (default = 95)
method: str
'splitconformal', 'localconformal'
prediction (if you specify `return_pi = True`)
**kwargs: additional parameters
`return_pi = True` for conformal prediction,
with `method` in ('splitconformal', 'localconformal')
or `return_std = True` for `self.obj` in
(`sklearn.linear_model.BayesianRidge`,
`sklearn.linear_model.ARDRegressor`,
`sklearn.gaussian_process.GaussianProcessRegressor`)`
Returns:
model predictions:
an array if uncertainty quantification is not requested,
or a tuple if with prediction intervals and simulations
if `return_std = True` (mean, standard deviation,
lower and upper prediction interval) or `return_pi = True`
()
Compute the score of the model.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method
Returns:
score: float
11class DeepMTS(MTS): 12 """Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress) 13 14 Parameters: 15 16 obj: object. 17 any object containing a method fit (obj.fit()) and a method predict 18 (obj.predict()). 19 20 n_layers: int. 21 number of layers in the neural network. 22 23 n_hidden_features: int. 24 number of nodes in the hidden layer. 25 26 activation_name: str. 27 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'. 28 29 a: float. 30 hyperparameter for 'prelu' or 'elu' activation function. 31 32 nodes_sim: str. 33 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 34 'uniform'. 35 36 bias: boolean. 37 indicates if the hidden layer contains a bias term (True) or not 38 (False). 39 40 dropout: float. 41 regularization parameter; (random) percentage of nodes dropped out 42 of the training. 43 44 direct_link: boolean. 45 indicates if the original predictors are included (True) in model's fitting or not (False). 46 47 n_clusters: int. 48 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering). 49 50 cluster_encode: bool. 51 defines how the variable containing clusters is treated (default is one-hot) 52 if `False`, then labels are used, without one-hot encoding. 53 54 type_clust: str. 55 type of clustering method: currently k-means ('kmeans') or Gaussian 56 Mixture Model ('gmm'). 57 58 type_scaling: a tuple of 3 strings. 59 scaling methods for inputs, hidden layer, and clustering respectively 60 (and when relevant). 61 Currently available: standardization ('std') or MinMax scaling ('minmax'). 62 63 lags: int. 64 number of lags used for each time series. 65 66 type_pi: str. 67 type of prediction interval; currently: 68 - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case 69 - "kde": based on Kernel Density Estimation of in-sample residuals 70 - "bootstrap": based on independent bootstrap of in-sample residuals 71 - "block-bootstrap": based on basic block bootstrap of in-sample residuals 72 - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals 73 - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals 74 - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals 75 - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals 76 - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals 77 - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals 78 79 block_size: int. 80 size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap"). 81 Default is round(3.15*(n_residuals^1/3)) 82 83 replications: int. 84 number of replications (if needed, for predictive simulation). Default is 'None'. 85 86 kernel: str. 87 the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'. 88 89 agg: str. 90 either "mean" or "median" for simulation of bootstrap aggregating 91 92 seed: int. 93 reproducibility seed for nodes_sim=='uniform' or predictive simulation. 94 95 backend: str. 96 "cpu" or "gpu" or "tpu". 97 98 verbose: int. 99 0: not printing; 1: printing 100 101 show_progress: bool. 102 True: progress bar when fitting each series; False: no progress bar when fitting each series 103 104 Attributes: 105 106 fit_objs_: dict 107 objects adjusted to each individual time series 108 109 y_: {array-like} 110 DeepMTS responses (most recent observations first) 111 112 X_: {array-like} 113 DeepMTS lags 114 115 xreg_: {array-like} 116 external regressors 117 118 y_means_: dict 119 a dictionary of each series mean values 120 121 preds_: {array-like} 122 successive model predictions 123 124 preds_std_: {array-like} 125 standard deviation around the predictions 126 127 return_std_: boolean 128 return uncertainty or not (set in predict) 129 130 df_: data frame 131 the input data frame, in case a data.frame is provided to `fit` 132 133 Examples: 134 135 Example 1: 136 137 ```python 138 import nnetsauce as ns 139 import numpy as np 140 from sklearn import linear_model 141 np.random.seed(123) 142 143 M = np.random.rand(10, 3) 144 M[:,0] = 10*M[:,0] 145 M[:,2] = 25*M[:,2] 146 print(M) 147 148 # Adjust Bayesian Ridge 149 regr4 = linear_model.BayesianRidge() 150 obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5) 151 obj_DeepMTS.fit(M) 152 print(obj_DeepMTS.predict()) 153 154 # with credible intervals 155 print(obj_DeepMTS.predict(return_std=True, level=80)) 156 157 print(obj_DeepMTS.predict(return_std=True, level=95)) 158 ``` 159 160 Example 2: 161 162 ```python 163 import nnetsauce as ns 164 import numpy as np 165 from sklearn import linear_model 166 167 dataset = { 168 'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'], 169 'series1' : [34, 30, 35.6, 33.3, 38.1], 170 'series2' : [4, 5.5, 5.6, 6.3, 5.1], 171 'series3' : [100, 100.5, 100.6, 100.2, 100.1]} 172 df = pd.DataFrame(dataset).set_index('date') 173 print(df) 174 175 # Adjust Bayesian Ridge 176 regr5 = linear_model.BayesianRidge() 177 obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5) 178 obj_DeepMTS.fit(df) 179 print(obj_DeepMTS.predict()) 180 181 # with credible intervals 182 print(obj_DeepMTS.predict(return_std=True, level=80)) 183 184 print(obj_DeepMTS.predict(return_std=True, level=95)) 185 ``` 186 187 """ 188 189 # construct the object ----- 190 191 def __init__( 192 self, 193 obj, 194 n_layers=3, 195 n_hidden_features=5, 196 activation_name="relu", 197 a=0.01, 198 nodes_sim="sobol", 199 bias=True, 200 dropout=0, 201 direct_link=True, 202 n_clusters=2, 203 cluster_encode=True, 204 type_clust="kmeans", 205 type_scaling=("std", "std", "std"), 206 lags=1, 207 type_pi="kde", 208 block_size=None, 209 replications=None, 210 kernel=None, 211 agg="mean", 212 seed=123, 213 backend="cpu", 214 verbose=0, 215 show_progress=True, 216 ): 217 assert int(lags) == lags, "parameter 'lags' should be an integer" 218 assert n_layers >= 1, "must have n_layers >= 1" 219 self.n_layers = int(n_layers) 220 221 if self.n_layers > 1: 222 for _ in range(self.n_layers - 1): 223 obj = CustomRegressor( 224 obj=deepcopy(obj), 225 n_hidden_features=n_hidden_features, 226 activation_name=activation_name, 227 a=a, 228 nodes_sim=nodes_sim, 229 bias=bias, 230 dropout=dropout, 231 direct_link=direct_link, 232 n_clusters=n_clusters, 233 cluster_encode=cluster_encode, 234 type_clust=type_clust, 235 type_scaling=type_scaling, 236 seed=seed, 237 backend=backend, 238 ) 239 240 self.obj = deepcopy(obj) 241 super().__init__( 242 obj=self.obj, 243 n_hidden_features=n_hidden_features, 244 activation_name=activation_name, 245 a=a, 246 nodes_sim=nodes_sim, 247 bias=bias, 248 dropout=dropout, 249 direct_link=direct_link, 250 n_clusters=n_clusters, 251 cluster_encode=cluster_encode, 252 type_clust=type_clust, 253 type_scaling=type_scaling, 254 lags=lags, 255 type_pi=type_pi, 256 block_size=block_size, 257 replications=replications, 258 kernel=kernel, 259 agg=agg, 260 seed=seed, 261 backend=backend, 262 verbose=verbose, 263 show_progress=show_progress, 264 )
Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress)
Parameters:
obj: object.
any object containing a method fit (obj.fit()) and a method predict
(obj.predict()).
n_layers: int.
number of layers in the neural network.
n_hidden_features: int.
number of nodes in the hidden layer.
activation_name: str.
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
a: float.
hyperparameter for 'prelu' or 'elu' activation function.
nodes_sim: str.
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'.
bias: boolean.
indicates if the hidden layer contains a bias term (True) or not
(False).
dropout: float.
regularization parameter; (random) percentage of nodes dropped out
of the training.
direct_link: boolean.
indicates if the original predictors are included (True) in model's fitting or not (False).
n_clusters: int.
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
cluster_encode: bool.
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding.
type_clust: str.
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm').
type_scaling: a tuple of 3 strings.
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax').
lags: int.
number of lags used for each time series.
type_pi: str.
type of prediction interval; currently:
- "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
- "kde": based on Kernel Density Estimation of in-sample residuals
- "bootstrap": based on independent bootstrap of in-sample residuals
- "block-bootstrap": based on basic block bootstrap of in-sample residuals
- "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
- "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
- "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
- "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
- "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
- "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
block_size: int.
size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
Default is round(3.15*(n_residuals^1/3))
replications: int.
number of replications (if needed, for predictive simulation). Default is 'None'.
kernel: str.
the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
agg: str.
either "mean" or "median" for simulation of bootstrap aggregating
seed: int.
reproducibility seed for nodes_sim=='uniform' or predictive simulation.
backend: str.
"cpu" or "gpu" or "tpu".
verbose: int.
0: not printing; 1: printing
show_progress: bool.
True: progress bar when fitting each series; False: no progress bar when fitting each series
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
y_: {array-like}
DeepMTS responses (most recent observations first)
X_: {array-like}
DeepMTS lags
xreg_: {array-like}
external regressors
y_means_: dict
a dictionary of each series mean values
preds_: {array-like}
successive model predictions
preds_std_: {array-like}
standard deviation around the predictions
return_std_: boolean
return uncertainty or not (set in predict)
df_: data frame
the input data frame, in case a data.frame is provided to `fit`
Examples:
Example 1:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
np.random.seed(123)
M = np.random.rand(10, 3)
M[:,0] = 10*M[:,0]
M[:,2] = 25*M[:,2]
print(M)
# Adjust Bayesian Ridge
regr4 = linear_model.BayesianRidge()
obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5)
obj_DeepMTS.fit(M)
print(obj_DeepMTS.predict())
# with credible intervals
print(obj_DeepMTS.predict(return_std=True, level=80))
print(obj_DeepMTS.predict(return_std=True, level=95))
Example 2:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)
# Adjust Bayesian Ridge
regr5 = linear_model.BayesianRidge()
obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5)
obj_DeepMTS.fit(df)
print(obj_DeepMTS.predict())
# with credible intervals
print(obj_DeepMTS.predict(return_std=True, level=80))
print(obj_DeepMTS.predict(return_std=True, level=95))
12class DiscreteTokenMTS(MTS): 13 """ 14 MTS for discrete token forecasting via nearest-neighbor in embedding space. 15 16 Maps continuous predictions to discrete tokens using nearest-neighbor lookup 17 in a vocabulary (embedding space). Supports probabilistic decoding with 18 temperature-controlled softmax and uncertainty quantification in token space. 19 20 Parameters 21 ---------- 22 obj : object 23 Base learner with fit() and predict() methods 24 25 vocab : np.ndarray of shape (vocab_size, n_series) 26 Token vocabulary - each row is a token embedding vector 27 28 metric : {'euclidean', 'cosine'}, default='euclidean' 29 Distance metric for nearest-neighbor lookup 30 31 return_mode : {'token_id', 'token_vector', 'both', 'probs'}, default='token_id' 32 Output format: 33 - 'token_id': integer token indices 34 - 'token_vector': token embedding vectors 35 - 'both': single DataFrame with token_id + dimensions 36 - 'probs': probability distribution over all tokens 37 38 softmax_temperature : float, default=1.0 39 Temperature for softmax when return_mode='probs' 40 Lower values (0.1-0.5) → sharper distributions (more deterministic) 41 Higher values (2.0-10.0) → smoother distributions (more exploratory) 42 43 normalize_vocab : bool, default=False 44 Whether to center and scale vocabulary to zero mean, unit variance 45 46 **mts_kwargs : dict 47 Additional parameters passed to MTS base class 48 49 Attributes 50 ---------- 51 vocab : np.ndarray 52 Normalized vocabulary (if normalize_vocab=True) 53 54 vocab_mean_ : np.ndarray 55 Mean used for normalization (if normalize_vocab=True) 56 57 vocab_std_ : np.ndarray 58 Std used for normalization (if normalize_vocab=True) 59 60 discretization_errors_ : pd.DataFrame or None 61 Distances from predictions to nearest tokens 62 63 Warnings 64 -------- 65 - Prediction intervals (lower/upper) are NOT discretized - only the mean 66 - For uncertainty in token space, use predict_token_distribution() 67 - Vocabulary quality strongly affects results - use diagnose_vocabulary() 68 69 Examples 70 -------- 71 >>> # Basic token prediction 72 >>> vocab = np.random.randn(100, 10) # 100 tokens, 10 dimensions 73 >>> model = DiscreteTokenMTS( 74 ... obj=Ridge(), 75 ... vocab=vocab, 76 ... lags=5, 77 ... return_mode='token_id' 78 ... ) 79 >>> model.fit(X_train) 80 >>> tokens = model.predict(h=10) 81 82 >>> # Probabilistic with temperature control 83 >>> model = DiscreteTokenMTS( 84 ... obj=Ridge(), 85 ... vocab=vocab, 86 ... lags=5, 87 ... return_mode='probs', 88 ... softmax_temperature=1.5 89 ... ) 90 >>> probs = model.predict(h=10) # Returns probability distributions 91 92 >>> # Uncertainty-aware token distributions 93 >>> freqs, entropy, mode = model.predict_token_distribution( 94 ... h=10, 95 ... replications=100 96 ... ) 97 """ 98 99 def __init__( 100 self, 101 obj, 102 vocab, 103 metric="euclidean", 104 return_mode="token_id", 105 softmax_temperature=1.0, 106 normalize_vocab=False, 107 **mts_kwargs, 108 ): 109 super().__init__(obj, **mts_kwargs) 110 111 # Convert and validate vocabulary 112 self.vocab_original = np.asarray(vocab, dtype=np.float64) 113 self._validate_vocabulary() 114 115 self.vocab_size = self.vocab_original.shape[0] 116 self.vocab_mean_ = None 117 self.vocab_std_ = None 118 self.normalize_vocab = normalize_vocab 119 120 # Normalize if requested 121 if normalize_vocab: 122 self._normalize_vocabulary() 123 else: 124 self.vocab = self.vocab_original.copy() 125 126 # Validate and set metric 127 assert metric in [ 128 "euclidean", 129 "cosine", 130 ], "metric must be 'euclidean' or 'cosine'" 131 self.metric = metric 132 self.distance_func = ( 133 euclidean_distances if metric == "euclidean" else cosine_distances 134 ) 135 136 # Validate and set return mode 137 assert return_mode in [ 138 "token_id", 139 "token_vector", 140 "both", 141 "probs", 142 ], "return_mode must be 'token_id', 'token_vector', 'both', or 'probs'" 143 self.return_mode = return_mode 144 145 # Validate temperature 146 assert softmax_temperature > 0, "softmax_temperature must be positive" 147 self.softmax_temperature = softmax_temperature 148 149 # Initialize error tracking 150 self.discretization_errors_ = None 151 152 def _validate_vocabulary(self): 153 """Comprehensive vocabulary validation""" 154 # Check shape 155 assert ( 156 self.vocab_original.ndim == 2 157 ), "vocab must be 2D array (vocab_size, n_series)" 158 assert ( 159 self.vocab_original.shape[0] > 0 160 ), "vocab must have at least one token" 161 162 # Check for NaN/Inf 163 if np.any(np.isnan(self.vocab_original)) or np.any( 164 np.isinf(self.vocab_original) 165 ): 166 raise ValueError("Vocabulary contains NaN or Inf values") 167 168 # Check for duplicates 169 unique_rows = np.unique(self.vocab_original, axis=0) 170 if len(unique_rows) < len(self.vocab_original): 171 n_duplicates = len(self.vocab_original) - len(unique_rows) 172 warnings.warn( 173 f"Vocabulary contains {n_duplicates} duplicate vectors. " 174 "This reduces effective vocabulary size.", 175 UserWarning, 176 ) 177 178 # Check for near-duplicates 179 if len(self.vocab_original) > 1: 180 dists = euclidean_distances(self.vocab_original) 181 np.fill_diagonal(dists, np.inf) 182 min_dist = dists.min() 183 184 if min_dist < 1e-6: 185 warnings.warn( 186 f"Vocabulary contains very close vectors (min distance: {min_dist:.2e}). " 187 "Consider increasing token diversity.", 188 UserWarning, 189 ) 190 191 def _normalize_vocabulary(self): 192 """Center and scale vocabulary""" 193 self.vocab_mean_ = self.vocab_original.mean(axis=0) 194 self.vocab_std_ = self.vocab_original.std(axis=0) + 1e-8 195 self.vocab = (self.vocab_original - self.vocab_mean_) / self.vocab_std_ 196 197 def fit(self, X, **kwargs): 198 """ 199 Fit model and validate vocabulary dimensions match data. 200 201 Parameters 202 ---------- 203 X : array-like of shape (n_samples, n_series) 204 Training data 205 206 **kwargs : dict 207 Additional parameters passed to parent fit 208 209 Returns 210 ------- 211 self : object 212 Fitted estimator 213 """ 214 # Call parent fit 215 super().fit(X, **kwargs) 216 217 # Validate vocabulary dimensions 218 n_series = X.shape[1] if X.ndim > 1 else 1 219 if self.vocab.shape[1] != n_series: 220 raise ValueError( 221 f"Vocabulary dimension ({self.vocab.shape[1]}) must match " 222 f"number of series ({n_series})" 223 ) 224 225 # Additional check for cosine distance 226 if self.metric == "cosine": 227 norms = np.linalg.norm(self.vocab, axis=1) 228 zero_vectors = norms < 1e-10 229 if np.any(zero_vectors): 230 raise ValueError( 231 f"Vocabulary contains {zero_vectors.sum()} zero/near-zero vectors. " 232 "Cosine distance requires non-zero vectors." 233 ) 234 235 return self 236 237 def _vectorized_map_to_tokens(self, continuous_preds): 238 """ 239 Vectorized token mapping for efficiency. 240 241 Parameters 242 ---------- 243 continuous_preds : np.ndarray of shape (h, n_series) 244 Continuous predictions 245 246 Returns 247 ------- 248 result : depends on return_mode 249 errors : np.ndarray 250 Distances to nearest tokens 251 """ 252 # Normalize predictions if vocabulary was normalized 253 if self.normalize_vocab: 254 continuous_preds = ( 255 continuous_preds - self.vocab_mean_ 256 ) / self.vocab_std_ 257 258 # Compute all distances at once 259 dists = self.distance_func(continuous_preds, self.vocab) 260 261 # Find nearest tokens 262 nearest_indices = np.argmin(dists, axis=1) 263 min_dists = dists[np.arange(len(dists)), nearest_indices] 264 265 if self.return_mode == "token_id": 266 return nearest_indices, min_dists 267 268 elif self.return_mode == "token_vector": 269 token_vecs = self.vocab[nearest_indices] 270 # Denormalize if vocabulary was normalized 271 if self.normalize_vocab: 272 token_vecs = token_vecs * self.vocab_std_ + self.vocab_mean_ 273 return token_vecs, min_dists 274 275 elif self.return_mode == "both": 276 # Return combined array: [token_id, dim_0, dim_1, ...] 277 token_ids = nearest_indices.reshape(-1, 1) 278 token_vecs = self.vocab[nearest_indices] 279 # Denormalize if vocabulary was normalized 280 if self.normalize_vocab: 281 token_vecs = token_vecs * self.vocab_std_ + self.vocab_mean_ 282 combined = np.column_stack([token_ids, token_vecs]) 283 return combined, min_dists 284 285 elif self.return_mode == "probs": 286 # Softmax of negative distances 287 probs = softmax(-dists / self.softmax_temperature, axis=1) 288 return probs, min_dists 289 290 def predict( 291 self, 292 h=5, 293 level=95, 294 quantiles=None, 295 return_discretization_error=False, 296 **kwargs, 297 ): 298 """ 299 Generate discrete token predictions. 300 301 Parameters 302 ---------- 303 h : int, default=5 304 Forecast horizon 305 306 level : int, default=95 307 Confidence level (only affects continuous forecasts) 308 309 quantiles : list of float, optional 310 Quantile levels 311 312 return_discretization_error : bool, default=False 313 If True, return (predictions, errors) tuple 314 315 **kwargs : dict 316 Additional parameters for parent predict 317 318 Returns 319 ------- 320 predictions : pd.DataFrame 321 Discrete predictions. Format depends on return_mode: 322 - 'token_id': single column 'token_id' 323 - 'token_vector': columns 'dim_0', 'dim_1', ... 324 - 'both': columns 'token_id', 'dim_0', 'dim_1', ... 325 - 'probs': columns 'token_0_prob', 'token_1_prob', ... 326 327 errors : pd.DataFrame (if return_discretization_error=True) 328 Discretization errors (distances to nearest tokens) 329 330 Warnings 331 -------- 332 When prediction intervals are requested but only mean is discretized, 333 a warning is issued. Use predict_token_distribution() for uncertainty 334 in token space. 335 """ 336 # Get continuous predictions from parent 337 continuous_result = super().predict( 338 h=h, level=level, quantiles=quantiles, **kwargs 339 ) 340 341 # FIXED: Robust type detection using duck typing 342 if hasattr(continuous_result, "_fields"): # Namedtuple 343 if ( 344 hasattr(continuous_result, "sims") 345 and continuous_result.sims is not None 346 ): 347 # Simulation-based forecast 348 return self._discretize_simulations( 349 continuous_result.sims, return_discretization_error 350 ) 351 elif hasattr(continuous_result, "mean"): 352 # Interval-based forecast - warn about information loss 353 warnings.warn( 354 "Prediction intervals cannot be meaningfully discretized. " 355 "Only mean predictions are converted to tokens. " 356 "Use predict_token_distribution(replications=N) for " 357 "uncertainty in token space.", 358 UserWarning, 359 ) 360 return self._discretize_dataframe( 361 continuous_result.mean, return_discretization_error 362 ) 363 elif isinstance(continuous_result, pd.DataFrame): 364 # Deterministic forecast 365 return self._discretize_dataframe( 366 continuous_result, return_discretization_error 367 ) 368 else: 369 raise NotImplementedError( 370 f"Unhandled predict output type: {type(continuous_result)}" 371 ) 372 373 def _discretize_dataframe(self, df, return_error=False): 374 """Discretize a continuous prediction DataFrame""" 375 # Use vectorized mapping 376 result, errors = self._vectorized_map_to_tokens(df.values) 377 378 # FIXED: Always return single DataFrame (even for 'both' mode) 379 if self.return_mode == "probs": 380 result_df = pd.DataFrame( 381 result, 382 index=df.index, 383 columns=[f"token_{i}_prob" for i in range(self.vocab_size)], 384 ) 385 elif self.return_mode == "both": 386 # Combined format: token_id + dimensions 387 columns = ["token_id"] + [ 388 f"dim_{i}" for i in range(self.vocab.shape[1]) 389 ] 390 result_df = pd.DataFrame(result, index=df.index, columns=columns) 391 result_df["token_id"] = result_df["token_id"].astype(int) 392 elif self.return_mode == "token_id": 393 result_df = pd.DataFrame( 394 result.reshape(-1, 1), index=df.index, columns=["token_id"] 395 ) 396 else: # 'token_vector' 397 result_df = pd.DataFrame( 398 result, 399 index=df.index, 400 columns=[f"dim_{i}" for i in range(self.vocab.shape[1])], 401 ) 402 403 if return_error: 404 error_df = pd.DataFrame( 405 errors.reshape(-1, 1), 406 index=df.index, 407 columns=["discretization_error"], 408 ) 409 self.discretization_errors_ = error_df 410 return result_df, error_df 411 412 return result_df 413 414 def _discretize_simulations(self, sims, return_error=False): 415 """Discretize simulation paths""" 416 discrete_sims = [] 417 all_errors = [] 418 419 for sim_df in sims: 420 result, errors = self._vectorized_map_to_tokens(sim_df.values) 421 422 if self.return_mode == "probs": 423 discrete_df = pd.DataFrame( 424 result, 425 index=sim_df.index, 426 columns=[f"token_{i}_prob" for i in range(self.vocab_size)], 427 ) 428 elif self.return_mode == "both": 429 columns = ["token_id"] + [ 430 f"dim_{i}" for i in range(self.vocab.shape[1]) 431 ] 432 discrete_df = pd.DataFrame( 433 result, index=sim_df.index, columns=columns 434 ) 435 discrete_df["token_id"] = discrete_df["token_id"].astype(int) 436 elif self.return_mode == "token_id": 437 discrete_df = pd.DataFrame( 438 result.reshape(-1, 1), 439 index=sim_df.index, 440 columns=["token_id"], 441 ) 442 else: # 'token_vector' 443 discrete_df = pd.DataFrame( 444 result, 445 index=sim_df.index, 446 columns=[f"dim_{i}" for i in range(self.vocab.shape[1])], 447 ) 448 449 discrete_sims.append(discrete_df) 450 451 if return_error: 452 error_df = pd.DataFrame( 453 errors.reshape(-1, 1), 454 index=sim_df.index, 455 columns=["discretization_error"], 456 ) 457 all_errors.append(error_df) 458 459 if return_error: 460 return tuple(discrete_sims), tuple(all_errors) 461 return tuple(discrete_sims) 462 463 # ========== NEW: Uncertainty Quantification in Token Space ========== 464 465 def predict_top_k(self, h=5, k=5, **kwargs): 466 """ 467 Predict top-k most probable tokens per timestep. 468 469 Parameters 470 ---------- 471 h : int 472 Forecast horizon 473 k : int 474 Number of top tokens to return 475 **kwargs : dict 476 Additional parameters for parent predict 477 478 Returns 479 ------- 480 predictions : pd.DataFrame 481 Columns: token_1, prob_1, token_2, prob_2, ..., token_k, prob_k 482 """ 483 continuous_result = super().predict(h=h, **kwargs) 484 485 # Handle different return types 486 if hasattr(continuous_result, "mean"): 487 preds = continuous_result.mean.values 488 index = continuous_result.mean.index 489 elif isinstance(continuous_result, pd.DataFrame): 490 preds = continuous_result.values 491 index = continuous_result.index 492 else: 493 raise ValueError("Cannot extract continuous predictions") 494 495 # Compute probabilities 496 dists = self.distance_func(preds, self.vocab) 497 probs = softmax(-dists / self.softmax_temperature, axis=1) 498 499 # Get top-k 500 top_k_indices = np.argsort(probs, axis=1)[:, -k:][:, ::-1] 501 top_k_probs = np.take_along_axis(probs, top_k_indices, axis=1) 502 503 # Format as DataFrame 504 columns = [] 505 data = [] 506 for i in range(k): 507 columns.extend([f"token_{i+1}", f"prob_{i+1}"]) 508 data.append(top_k_indices[:, i]) 509 data.append(top_k_probs[:, i]) 510 511 return pd.DataFrame(np.column_stack(data), index=index, columns=columns) 512 513 def predict_token_distribution(self, h=5, replications=100, **kwargs): 514 """ 515 Generate token probability distribution from simulation ensemble. 516 517 This method provides meaningful uncertainty quantification in token space 518 by discretizing multiple simulation paths and computing token frequencies. 519 520 Parameters 521 ---------- 522 h : int 523 Forecast horizon 524 replications : int 525 Number of simulation paths 526 **kwargs : dict 527 Additional parameters for parent predict 528 529 Returns 530 ------- 531 frequencies : pd.DataFrame 532 Token frequencies across simulations 533 Columns: token_0_freq, token_1_freq, ..., token_V_freq 534 535 entropy : pd.Series 536 Shannon entropy per timestep (uncertainty measure) 537 538 mode_tokens : pd.DataFrame 539 Most frequent token per timestep 540 541 Examples 542 -------- 543 >>> freqs, entropy, mode = model.predict_token_distribution(h=10, replications=100) 544 >>> # High entropy → uncertain prediction 545 >>> uncertain_steps = entropy[entropy > 2.0] 546 >>> # Use mode tokens for point predictions 547 >>> predictions = mode['mode_token'].values 548 """ 549 # Force simulation mode 550 kwargs["replications"] = replications 551 continuous_result = super().predict(h=h, **kwargs) 552 553 # Extract simulations 554 if ( 555 hasattr(continuous_result, "sims") 556 and continuous_result.sims is not None 557 ): 558 sims = continuous_result.sims 559 index = continuous_result.mean.index 560 else: 561 raise ValueError( 562 "predict_token_distribution requires simulation-based forecasting. " 563 "Ensure replications > 0 and type_pi supports simulations." 564 ) 565 566 # Discretize all paths 567 all_tokens = [] 568 for sim in sims: 569 tokens, _ = self._vectorized_map_to_tokens(sim.values) 570 if self.return_mode == "probs": 571 # For probs mode, get argmax token 572 tokens = np.argmax(tokens, axis=1) 573 elif self.return_mode == "both": 574 # Extract token_id column 575 tokens = tokens[:, 0].astype(int) 576 elif self.return_mode == "token_vector": 577 # Map back to token IDs 578 dists = self.distance_func(tokens, self.vocab) 579 tokens = np.argmin(dists, axis=1) 580 # else: token_id mode, already correct 581 582 all_tokens.append(tokens) 583 584 all_tokens = np.array(all_tokens) # (replications, h) 585 586 # Compute frequency distribution 587 h_actual = all_tokens.shape[1] 588 token_freqs = np.zeros((h_actual, self.vocab_size)) 589 590 for t in range(h_actual): 591 unique, counts = np.unique(all_tokens[:, t], return_counts=True) 592 token_freqs[t, unique] = counts / replications 593 594 # Compute entropy 595 epsilon = 1e-10 596 entropy = -np.sum(token_freqs * np.log(token_freqs + epsilon), axis=1) 597 598 # Get mode 599 mode_tokens = np.argmax(token_freqs, axis=1) 600 601 # Package results 602 freq_df = pd.DataFrame( 603 token_freqs, 604 index=index, 605 columns=[f"token_{i}_freq" for i in range(self.vocab_size)], 606 ) 607 608 entropy_series = pd.Series(entropy, index=index, name="entropy") 609 610 mode_df = pd.DataFrame(mode_tokens, index=index, columns=["mode_token"]) 611 612 return freq_df, entropy_series, mode_df 613 614 # ========== Utility Methods ========== 615 616 def tokens_to_vectors(self, token_ids): 617 """Convert token IDs to embedding vectors (in original scale)""" 618 token_ids = np.asarray(token_ids).astype(int) 619 assert np.all( 620 (token_ids >= 0) & (token_ids < self.vocab_size) 621 ), f"Token IDs must be in range [0, {self.vocab_size-1}]" 622 vectors = self.vocab[token_ids] 623 # Denormalize if vocabulary was normalized 624 if self.normalize_vocab: 625 vectors = vectors * self.vocab_std_ + self.vocab_mean_ 626 return vectors 627 628 def get_token_neighbors(self, token_id, k=5): 629 """Find k nearest neighbors of a token""" 630 assert ( 631 0 <= token_id < self.vocab_size 632 ), f"token_id must be in range [0, {self.vocab_size-1}]" 633 634 token_vec = self.vocab[token_id].reshape(1, -1) 635 dists = self.distance_func(token_vec, self.vocab).flatten() 636 637 sorted_indices = np.argsort(dists) 638 sorted_indices = sorted_indices[sorted_indices != token_id][:k] 639 640 return pd.DataFrame( 641 {"neighbor_id": sorted_indices, "distance": dists[sorted_indices]} 642 ) 643 644 def compute_vocab_coverage(self, predictions): 645 """Compute vocabulary usage statistics""" 646 if "token_id" not in predictions.columns: 647 raise ValueError("predictions must have 'token_id' column") 648 649 token_ids = predictions["token_id"].values 650 unique_tokens = np.unique(token_ids) 651 freq = pd.Series(token_ids).value_counts().sort_index() 652 653 return { 654 "unique_tokens": len(unique_tokens), 655 "coverage_pct": 100 * len(unique_tokens) / self.vocab_size, 656 "token_frequencies": freq, 657 "most_common_token": freq.idxmax() if len(freq) > 0 else None, 658 "least_common_token": freq.idxmin() if len(freq) > 0 else None, 659 } 660 661 def diagnose_vocabulary(self): 662 """ 663 Comprehensive vocabulary quality diagnostics. 664 665 Returns 666 ------- 667 report : dict 668 Quality metrics including distances, condition number, coverage 669 """ 670 # Use original vocabulary for diagnostics to get meaningful statistics 671 vocab_to_diagnose = self.vocab_original 672 673 report = { 674 "vocab_size": self.vocab_size, 675 "embedding_dim": vocab_to_diagnose.shape[1], 676 "normalized": self.normalize_vocab, 677 } 678 679 # Pairwise distances 680 dists = euclidean_distances(vocab_to_diagnose) 681 np.fill_diagonal(dists, np.inf) 682 683 report["min_pairwise_distance"] = dists.min() 684 report["max_pairwise_distance"] = dists.max() 685 report["mean_pairwise_distance"] = dists[dists != np.inf].mean() 686 687 # Condition number 688 U, s, Vt = np.linalg.svd(vocab_to_diagnose, full_matrices=False) 689 report["condition_number"] = s.max() / (s.min() + 1e-10) 690 691 # Coverage volume 692 ranges = vocab_to_diagnose.max(axis=0) - vocab_to_diagnose.min(axis=0) 693 report["coverage_volume"] = np.prod(ranges) 694 695 # Duplicates 696 unique_rows = np.unique(vocab_to_diagnose, axis=0) 697 report["duplicate_count"] = len(vocab_to_diagnose) - len(unique_rows) 698 699 return report 700 701 def print_vocabulary_report(self): 702 """Print human-readable vocabulary diagnostics""" 703 report = self.diagnose_vocabulary() 704 705 print("=" * 60) 706 print("VOCABULARY QUALITY REPORT") 707 print("=" * 60) 708 print(f"Vocabulary size: {report['vocab_size']} tokens") 709 print(f"Embedding dimension: {report['embedding_dim']}") 710 print(f"\nPairwise Distances:") 711 print(f" Min: {report['min_pairwise_distance']:.6f}") 712 print(f" Mean: {report['mean_pairwise_distance']:.6f}") 713 print(f" Max: {report['max_pairwise_distance']:.6f}") 714 print(f"\nVocabulary Health:") 715 print(f" Condition number: {report['condition_number']:.2f}") 716 if report["condition_number"] > 1000: 717 print( 718 " ⚠️ WARNING: High condition number may indicate redundant tokens" 719 ) 720 print(f" Duplicate tokens: {report['duplicate_count']}") 721 if report["duplicate_count"] > 0: 722 print(" ⚠️ WARNING: Duplicates reduce effective vocabulary size") 723 print(f" Coverage volume: {report['coverage_volume']:.2e}") 724 print("=" * 60)
MTS for discrete token forecasting via nearest-neighbor in embedding space.
Maps continuous predictions to discrete tokens using nearest-neighbor lookup in a vocabulary (embedding space). Supports probabilistic decoding with temperature-controlled softmax and uncertainty quantification in token space.
Parameters
obj : object Base learner with fit() and predict() methods
vocab : np.ndarray of shape (vocab_size, n_series) Token vocabulary - each row is a token embedding vector
metric : {'euclidean', 'cosine'}, default='euclidean' Distance metric for nearest-neighbor lookup
return_mode : {'token_id', 'token_vector', 'both', 'probs'}, default='token_id' Output format: - 'token_id': integer token indices - 'token_vector': token embedding vectors - 'both': single DataFrame with token_id + dimensions - 'probs': probability distribution over all tokens
softmax_temperature : float, default=1.0 Temperature for softmax when return_mode='probs' Lower values (0.1-0.5) → sharper distributions (more deterministic) Higher values (2.0-10.0) → smoother distributions (more exploratory)
normalize_vocab : bool, default=False Whether to center and scale vocabulary to zero mean, unit variance
**mts_kwargs : dict Additional parameters passed to MTS base class
Attributes
vocab : np.ndarray Normalized vocabulary (if normalize_vocab=True)
vocab_mean_ : np.ndarray Mean used for normalization (if normalize_vocab=True)
vocab_std_ : np.ndarray Std used for normalization (if normalize_vocab=True)
discretization_errors_ : pd.DataFrame or None Distances from predictions to nearest tokens
Warnings
- Prediction intervals (lower/upper) are NOT discretized - only the mean
- For uncertainty in token space, use predict_token_distribution()
- Vocabulary quality strongly affects results - use diagnose_vocabulary()
Examples
>>> # Basic token prediction
>>> vocab = np.random.randn(100, 10) # 100 tokens, 10 dimensions
>>> model = DiscreteTokenMTS(
... obj=Ridge(),
... vocab=vocab,
... lags=5,
... return_mode='token_id'
... )
>>> model.fit(X_train)
>>> tokens = model.predict(h=10)
>>> # Probabilistic with temperature control
>>> model = DiscreteTokenMTS(
... obj=Ridge(),
... vocab=vocab,
... lags=5,
... return_mode='probs',
... softmax_temperature=1.5
... )
>>> probs = model.predict(h=10) # Returns probability distributions
>>> # Uncertainty-aware token distributions
>>> freqs, entropy, mode = model.predict_token_distribution(
... h=10,
... replications=100
... )
197 def fit(self, X, **kwargs): 198 """ 199 Fit model and validate vocabulary dimensions match data. 200 201 Parameters 202 ---------- 203 X : array-like of shape (n_samples, n_series) 204 Training data 205 206 **kwargs : dict 207 Additional parameters passed to parent fit 208 209 Returns 210 ------- 211 self : object 212 Fitted estimator 213 """ 214 # Call parent fit 215 super().fit(X, **kwargs) 216 217 # Validate vocabulary dimensions 218 n_series = X.shape[1] if X.ndim > 1 else 1 219 if self.vocab.shape[1] != n_series: 220 raise ValueError( 221 f"Vocabulary dimension ({self.vocab.shape[1]}) must match " 222 f"number of series ({n_series})" 223 ) 224 225 # Additional check for cosine distance 226 if self.metric == "cosine": 227 norms = np.linalg.norm(self.vocab, axis=1) 228 zero_vectors = norms < 1e-10 229 if np.any(zero_vectors): 230 raise ValueError( 231 f"Vocabulary contains {zero_vectors.sum()} zero/near-zero vectors. " 232 "Cosine distance requires non-zero vectors." 233 ) 234 235 return self
Fit model and validate vocabulary dimensions match data.
Parameters
X : array-like of shape (n_samples, n_series) Training data
**kwargs : dict Additional parameters passed to parent fit
Returns
self : object Fitted estimator
290 def predict( 291 self, 292 h=5, 293 level=95, 294 quantiles=None, 295 return_discretization_error=False, 296 **kwargs, 297 ): 298 """ 299 Generate discrete token predictions. 300 301 Parameters 302 ---------- 303 h : int, default=5 304 Forecast horizon 305 306 level : int, default=95 307 Confidence level (only affects continuous forecasts) 308 309 quantiles : list of float, optional 310 Quantile levels 311 312 return_discretization_error : bool, default=False 313 If True, return (predictions, errors) tuple 314 315 **kwargs : dict 316 Additional parameters for parent predict 317 318 Returns 319 ------- 320 predictions : pd.DataFrame 321 Discrete predictions. Format depends on return_mode: 322 - 'token_id': single column 'token_id' 323 - 'token_vector': columns 'dim_0', 'dim_1', ... 324 - 'both': columns 'token_id', 'dim_0', 'dim_1', ... 325 - 'probs': columns 'token_0_prob', 'token_1_prob', ... 326 327 errors : pd.DataFrame (if return_discretization_error=True) 328 Discretization errors (distances to nearest tokens) 329 330 Warnings 331 -------- 332 When prediction intervals are requested but only mean is discretized, 333 a warning is issued. Use predict_token_distribution() for uncertainty 334 in token space. 335 """ 336 # Get continuous predictions from parent 337 continuous_result = super().predict( 338 h=h, level=level, quantiles=quantiles, **kwargs 339 ) 340 341 # FIXED: Robust type detection using duck typing 342 if hasattr(continuous_result, "_fields"): # Namedtuple 343 if ( 344 hasattr(continuous_result, "sims") 345 and continuous_result.sims is not None 346 ): 347 # Simulation-based forecast 348 return self._discretize_simulations( 349 continuous_result.sims, return_discretization_error 350 ) 351 elif hasattr(continuous_result, "mean"): 352 # Interval-based forecast - warn about information loss 353 warnings.warn( 354 "Prediction intervals cannot be meaningfully discretized. " 355 "Only mean predictions are converted to tokens. " 356 "Use predict_token_distribution(replications=N) for " 357 "uncertainty in token space.", 358 UserWarning, 359 ) 360 return self._discretize_dataframe( 361 continuous_result.mean, return_discretization_error 362 ) 363 elif isinstance(continuous_result, pd.DataFrame): 364 # Deterministic forecast 365 return self._discretize_dataframe( 366 continuous_result, return_discretization_error 367 ) 368 else: 369 raise NotImplementedError( 370 f"Unhandled predict output type: {type(continuous_result)}" 371 )
Generate discrete token predictions.
Parameters
h : int, default=5 Forecast horizon
level : int, default=95 Confidence level (only affects continuous forecasts)
quantiles : list of float, optional Quantile levels
return_discretization_error : bool, default=False If True, return (predictions, errors) tuple
**kwargs : dict Additional parameters for parent predict
Returns
predictions : pd.DataFrame Discrete predictions. Format depends on return_mode: - 'token_id': single column 'token_id' - 'token_vector': columns 'dim_0', 'dim_1', ... - 'both': columns 'token_id', 'dim_0', 'dim_1', ... - 'probs': columns 'token_0_prob', 'token_1_prob', ...
errors : pd.DataFrame (if return_discretization_error=True) Discretization errors (distances to nearest tokens)
Warnings
When prediction intervals are requested but only mean is discretized, a warning is issued. Use predict_token_distribution() for uncertainty in token space.
6class Downloader: 7 """Download datasets from data sources (R-universe for now)""" 8 9 def __init__(self): 10 self.pkgname = None 11 self.dataset = None 12 self.source = None 13 self.url = None 14 self.request = None 15 16 def download( 17 self, 18 pkgname="MASS", 19 dataset="Boston", 20 source="https://cran.r-universe.dev/", 21 **kwargs 22 ): 23 """Download datasets from data sources (R-universe for now) 24 25 Examples: 26 27 ```python 28 import nnetsauce as ns 29 30 downloader = ns.Downloader() 31 df = downloader.download(pkgname="MASS", dataset="Boston") 32 ``` 33 34 """ 35 self.pkgname = pkgname 36 self.dataset = dataset 37 self.source = source 38 self.url = source + pkgname + "/data/" + dataset + "/json" 39 self.request = requests.get(self.url) 40 return pd.DataFrame(self.request.json(), **kwargs)
Download datasets from data sources (R-universe for now)
16 def download( 17 self, 18 pkgname="MASS", 19 dataset="Boston", 20 source="https://cran.r-universe.dev/", 21 **kwargs 22 ): 23 """Download datasets from data sources (R-universe for now) 24 25 Examples: 26 27 ```python 28 import nnetsauce as ns 29 30 downloader = ns.Downloader() 31 df = downloader.download(pkgname="MASS", dataset="Boston") 32 ``` 33 34 """ 35 self.pkgname = pkgname 36 self.dataset = dataset 37 self.source = source 38 self.url = source + pkgname + "/data/" + dataset + "/json" 39 self.request = requests.get(self.url) 40 return pd.DataFrame(self.request.json(), **kwargs)
Download datasets from data sources (R-universe for now)
Examples:
import nnetsauce as ns
downloader = ns.Downloader()
df = downloader.download(pkgname="MASS", dataset="Boston")
7class ElasticNet2Regressor(BaseEstimator, RegressorMixin): 8 def __init__( 9 self, 10 n_hidden_features=100, 11 alpha=1.0, 12 l1_ratio=0.5, 13 lambd=0.1, 14 activation_name="tanh", 15 a=0.01, 16 max_iter=1000, 17 tol=1e-4, 18 random_state=None, 19 ): 20 self.n_hidden_features = n_hidden_features 21 self.alpha = alpha 22 self.l1_ratio = l1_ratio 23 self.lambd = lambd 24 self.activation_name = activation_name 25 self.a = a 26 self.max_iter = max_iter 27 self.tol = tol 28 self.random_state = random_state 29 30 def _activation(self, Z): 31 if self.activation_name == "relu": 32 return np.maximum(0, Z) 33 elif self.activation_name == "tanh": 34 return np.tanh(Z) 35 elif self.activation_name == "sigmoid": 36 return 1 / (1 + np.exp(-Z)) 37 elif self.activation_name == "prelu": 38 return np.where(Z > 0, Z, self.a * Z) 39 elif self.activation_name == "elu": 40 return np.where(Z > 0, Z, self.a * (np.exp(Z) - 1)) 41 else: 42 raise ValueError(f"Unknown activation: {self.activation_name}") 43 44 def fit(self, X, y): 45 X, y = check_X_y(X, y) 46 rng = np.random.RandomState(self.random_state) 47 48 # Standardize inputs 49 self.X_mean_ = X.mean(axis=0) 50 self.X_std_ = X.std(axis=0) + 1e-8 51 X_scaled = (X - self.X_mean_) / self.X_std_ 52 53 # Center response 54 self.y_mean_ = y.mean() 55 y_centered = y - self.y_mean_ 56 57 # Random feature mapping 58 self.W_in_ = rng.randn(X.shape[1], self.n_hidden_features) 59 self.b_in_ = rng.randn(self.n_hidden_features) 60 H = self._activation(X_scaled @ self.W_in_ + self.b_in_) 61 62 # Doubly-constrained optimization with Elastic Net 63 beta = np.zeros(self.n_hidden_features) 64 65 for _ in range(self.max_iter): 66 beta_old = beta.copy() 67 68 # Gradient descent step with projection 69 grad = H.T @ (H @ beta - y_centered) / len(y) 70 step = 0.01 / (1 + self.alpha * (1 - self.l1_ratio)) 71 72 # Soft thresholding (L1) 73 beta = beta - step * grad 74 threshold = step * self.alpha * self.l1_ratio 75 beta = np.sign(beta) * np.maximum(np.abs(beta) - threshold, 0) 76 77 # L2 projection (constraint) 78 norm = np.linalg.norm(beta) 79 if norm > self.lambd: 80 beta = beta * (self.lambd / norm) 81 82 if np.linalg.norm(beta - beta_old) < self.tol: 83 break 84 85 self.beta_ = beta 86 return self 87 88 def predict(self, X): 89 X = check_array(X) 90 X_scaled = (X - self.X_mean_) / self.X_std_ 91 H = self._activation(X_scaled @ self.W_in_ + self.b_in_) 92 return H @ self.beta_ + self.y_mean_
Base class for all estimators in scikit-learn.
Inheriting from this class provides default implementations of:
- setting and getting parameters used by
GridSearchCVand friends; - textual and HTML representation displayed in terminals and IDEs;
- estimator serialization;
- parameters validation;
- data validation;
- feature names validation.
Read more in the :ref:User Guide <rolling_your_own_estimator>.
Notes
All estimators should specify all the parameters that can be set
at the class level in their __init__ as explicit keyword
arguments (no *args or **kwargs).
Examples
>>> import numpy as np
>>> from sklearn.base import BaseEstimator
>>> class MyEstimator(BaseEstimator):
... def __init__(self, *, param=1):
... self.param = param
... def fit(self, X, y=None):
... self.is_fitted_ = True
... return self
... def predict(self, X):
... return np.full(shape=X.shape[0], fill_value=self.param)
>>> estimator = MyEstimator(param=2)
>>> estimator.get_params()
{'param': 2}
>>> X = np.array([[1, 2], [2, 3], [3, 4]])
>>> y = np.array([1, 0, 1])
>>> estimator.fit(X, y).predict(X)
array([2, 2, 2])
>>> estimator.set_params(param=3).fit(X, y).predict(X)
array([3, 3, 3])
44 def fit(self, X, y): 45 X, y = check_X_y(X, y) 46 rng = np.random.RandomState(self.random_state) 47 48 # Standardize inputs 49 self.X_mean_ = X.mean(axis=0) 50 self.X_std_ = X.std(axis=0) + 1e-8 51 X_scaled = (X - self.X_mean_) / self.X_std_ 52 53 # Center response 54 self.y_mean_ = y.mean() 55 y_centered = y - self.y_mean_ 56 57 # Random feature mapping 58 self.W_in_ = rng.randn(X.shape[1], self.n_hidden_features) 59 self.b_in_ = rng.randn(self.n_hidden_features) 60 H = self._activation(X_scaled @ self.W_in_ + self.b_in_) 61 62 # Doubly-constrained optimization with Elastic Net 63 beta = np.zeros(self.n_hidden_features) 64 65 for _ in range(self.max_iter): 66 beta_old = beta.copy() 67 68 # Gradient descent step with projection 69 grad = H.T @ (H @ beta - y_centered) / len(y) 70 step = 0.01 / (1 + self.alpha * (1 - self.l1_ratio)) 71 72 # Soft thresholding (L1) 73 beta = beta - step * grad 74 threshold = step * self.alpha * self.l1_ratio 75 beta = np.sign(beta) * np.maximum(np.abs(beta) - threshold, 0) 76 77 # L2 projection (constraint) 78 norm = np.linalg.norm(beta) 79 if norm > self.lambd: 80 beta = beta * (self.lambd / norm) 81 82 if np.linalg.norm(beta - beta_old) < self.tol: 83 break 84 85 self.beta_ = beta 86 return self
23class GLMClassifier(GLM, ClassifierMixin): 24 """Generalized 'linear' models using quasi-randomized networks (classification) 25 26 Parameters: 27 28 n_hidden_features: int 29 number of nodes in the hidden layer 30 31 lambda1: float 32 regularization parameter for GLM coefficients on original features 33 34 alpha1: float 35 controls compromize between l1 and l2 norm of GLM coefficients on original features 36 37 lambda2: float 38 regularization parameter for GLM coefficients on nonlinear features 39 40 alpha2: float 41 controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features 42 43 activation_name: str 44 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 45 46 a: float 47 hyperparameter for 'prelu' or 'elu' activation function 48 49 nodes_sim: str 50 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 51 'uniform' 52 53 bias: boolean 54 indicates if the hidden layer contains a bias term (True) or not 55 (False) 56 57 dropout: float 58 regularization parameter; (random) percentage of nodes dropped out 59 of the training 60 61 direct_link: boolean 62 indicates if the original predictors are included (True) in model's 63 fitting or not (False) 64 65 n_clusters: int 66 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 67 no clustering) 68 69 cluster_encode: bool 70 defines how the variable containing clusters is treated (default is one-hot) 71 if `False`, then labels are used, without one-hot encoding 72 73 type_clust: str 74 type of clustering method: currently k-means ('kmeans') or Gaussian 75 Mixture Model ('gmm') 76 77 type_scaling: a tuple of 3 strings 78 scaling methods for inputs, hidden layer, and clustering respectively 79 (and when relevant). 80 Currently available: standardization ('std') or MinMax scaling ('minmax') 81 82 optimizer: object 83 optimizer, from class nnetsauce.Optimizer 84 85 backend: str. 86 "cpu" or "gpu" or "tpu". 87 88 seed: int 89 reproducibility seed for nodes_sim=='uniform' 90 91 Attributes: 92 93 beta_: vector 94 regression coefficients 95 96 Examples: 97 98 See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py) 99 100 """ 101 102 # construct the object ----- 103 _estimator_type = "classifier" 104 105 def __init__( 106 self, 107 n_hidden_features=5, 108 lambda1=0.01, 109 alpha1=0.5, 110 lambda2=0.01, 111 alpha2=0.5, 112 family="expit", 113 activation_name="relu", 114 a=0.01, 115 nodes_sim="sobol", 116 bias=True, 117 dropout=0, 118 direct_link=True, 119 n_clusters=2, 120 cluster_encode=True, 121 type_clust="kmeans", 122 type_scaling=("std", "std", "std"), 123 optimizer=Optimizer(), 124 backend="cpu", 125 seed=123, 126 ): 127 super().__init__( 128 n_hidden_features=n_hidden_features, 129 lambda1=lambda1, 130 alpha1=alpha1, 131 lambda2=lambda2, 132 alpha2=alpha2, 133 activation_name=activation_name, 134 a=a, 135 nodes_sim=nodes_sim, 136 bias=bias, 137 dropout=dropout, 138 direct_link=direct_link, 139 n_clusters=n_clusters, 140 cluster_encode=cluster_encode, 141 type_clust=type_clust, 142 type_scaling=type_scaling, 143 optimizer=optimizer, 144 backend=backend, 145 seed=seed, 146 ) 147 148 self.family = family 149 150 def logit_loss(self, Y, row_index, XB): 151 self.n_classes = Y.shape[1] # len(np.unique(y)) 152 # Y = mo.one_hot_encode2(y, self.n_classes) 153 # Y = self.optimizer.one_hot_encode(y, self.n_classes) 154 155 # max_double = 709.0 # only if softmax 156 # XB[XB > max_double] = max_double 157 XB[XB > 709.0] = 709.0 158 159 if row_index is None: 160 return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB)) 161 162 return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB)) 163 164 def expit_erf_loss(self, Y, row_index, XB): 165 # self.n_classes = len(np.unique(y)) 166 # Y = mo.one_hot_encode2(y, self.n_classes) 167 # Y = self.optimizer.one_hot_encode(y, self.n_classes) 168 self.n_classes = Y.shape[1] 169 170 if row_index is None: 171 return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB)) 172 173 return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB)) 174 175 def loss_func( 176 self, 177 beta, 178 group_index, 179 X, 180 Y, 181 y, 182 row_index=None, 183 type_loss="logit", 184 **kwargs 185 ): 186 res = { 187 "logit": self.logit_loss, 188 "expit": self.expit_erf_loss, 189 "erf": self.expit_erf_loss, 190 } 191 192 if row_index is None: 193 row_index = range(len(y)) 194 XB = self.compute_XB( 195 X, 196 beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"), 197 ) 198 199 return res[type_loss](Y, row_index, XB) + self.compute_penalty( 200 group_index=group_index, beta=beta 201 ) 202 203 XB = self.compute_XB( 204 X, 205 beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"), 206 row_index=row_index, 207 ) 208 209 return res[type_loss](Y, row_index, XB) + self.compute_penalty( 210 group_index=group_index, beta=beta 211 ) 212 213 def fit(self, X, y, **kwargs): 214 """Fit GLM model to training data (X, y). 215 216 Args: 217 218 X: {array-like}, shape = [n_samples, n_features] 219 Training vectors, where n_samples is the number 220 of samples and n_features is the number of features. 221 222 y: array-like, shape = [n_samples] 223 Target values. 224 225 **kwargs: additional parameters to be passed to 226 self.cook_training_set or self.obj.fit 227 228 Returns: 229 230 self: object 231 232 """ 233 234 assert mx.is_factor( 235 y 236 ), "y must contain only integers" # change is_factor and subsampling everywhere 237 238 self.classes_ = np.unique(y) # for compatibility with sklearn 239 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 240 241 self.beta_ = None 242 243 n, p = X.shape 244 245 self.group_index = n * X.shape[1] 246 247 self.n_classes = len(np.unique(y)) 248 249 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 250 251 # Y = mo.one_hot_encode2(output_y, self.n_classes) 252 Y = self.optimizer.one_hot_encode(output_y, self.n_classes) 253 254 # initialization 255 if self.backend == "cpu": 256 beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 257 else: 258 beta_ = jnp.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 259 260 # optimization 261 # fit(self, loss_func, response, x0, **kwargs): 262 # loss_func(self, beta, group_index, X, y, 263 # row_index=None, type_loss="gaussian", 264 # **kwargs) 265 self.optimizer.fit( 266 self.loss_func, 267 response=y, 268 x0=beta_.flatten(order="F"), 269 group_index=self.group_index, 270 X=scaled_Z, 271 Y=Y, 272 y=y, 273 type_loss=self.family, 274 ) 275 276 self.beta_ = self.optimizer.results[0] 277 self.classes_ = np.unique(y) 278 279 return self 280 281 def predict(self, X, **kwargs): 282 """Predict test data X. 283 284 Args: 285 286 X: {array-like}, shape = [n_samples, n_features] 287 Training vectors, where n_samples is the number 288 of samples and n_features is the number of features. 289 290 **kwargs: additional parameters to be passed to 291 self.cook_test_set 292 293 Returns: 294 295 model predictions: {array-like} 296 297 """ 298 299 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 300 301 def predict_proba(self, X, **kwargs): 302 """Predict probabilities for test data X. 303 304 Args: 305 306 X: {array-like}, shape = [n_samples, n_features] 307 Training vectors, where n_samples is the number 308 of samples and n_features is the number of features. 309 310 **kwargs: additional parameters to be passed to 311 self.cook_test_set 312 313 Returns: 314 315 probability estimates for test data: {array-like} 316 317 """ 318 if len(X.shape) == 1: 319 n_features = X.shape[0] 320 new_X = mo.rbind( 321 X.reshape(1, n_features), 322 np.ones(n_features).reshape(1, n_features), 323 ) 324 325 Z = self.cook_test_set(new_X, **kwargs) 326 327 else: 328 Z = self.cook_test_set(X, **kwargs) 329 330 ZB = mo.safe_sparse_dot( 331 Z, 332 self.beta_.reshape( 333 self.n_classes, 334 X.shape[1] + self.n_hidden_features + self.n_clusters, 335 ).T, 336 ) 337 338 if self.family == "logit": 339 exp_ZB = np.exp(ZB) 340 341 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 342 343 if self.family == "expit": 344 exp_ZB = expit(ZB) 345 346 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 347 348 if self.family == "erf": 349 exp_ZB = 0.5 * (1 + erf(ZB)) 350 351 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 352 353 def score(self, X, y, scoring=None): 354 """Scoring function for classification. 355 356 Args: 357 358 X: {array-like}, shape = [n_samples, n_features] 359 Training vectors, where n_samples is the number 360 of samples and n_features is the number of features. 361 362 y: array-like, shape = [n_samples] 363 Target values. 364 365 scoring: str 366 scoring method (default is accuracy) 367 368 Returns: 369 370 score: float 371 """ 372 373 if scoring is None: 374 scoring = "accuracy" 375 376 if scoring == "accuracy": 377 return skm2.accuracy_score(y, self.predict(X)) 378 379 if scoring == "f1": 380 return skm2.f1_score(y, self.predict(X)) 381 382 if scoring == "precision": 383 return skm2.precision_score(y, self.predict(X)) 384 385 if scoring == "recall": 386 return skm2.recall_score(y, self.predict(X)) 387 388 if scoring == "roc_auc": 389 return skm2.roc_auc_score(y, self.predict(X)) 390 391 if scoring == "log_loss": 392 return skm2.log_loss(y, self.predict_proba(X)) 393 394 if scoring == "balanced_accuracy": 395 return skm2.balanced_accuracy_score(y, self.predict(X)) 396 397 if scoring == "average_precision": 398 return skm2.average_precision_score(y, self.predict(X)) 399 400 if scoring == "neg_brier_score": 401 return -skm2.brier_score_loss(y, self.predict_proba(X)) 402 403 if scoring == "neg_log_loss": 404 return -skm2.log_loss(y, self.predict_proba(X)) 405 406 @property 407 def _estimator_type(self): 408 return "classifier"
Generalized 'linear' models using quasi-randomized networks (classification)
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
lambda1: float
regularization parameter for GLM coefficients on original features
alpha1: float
controls compromize between l1 and l2 norm of GLM coefficients on original features
lambda2: float
regularization parameter for GLM coefficients on nonlinear features
alpha2: float
controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
optimizer: object
optimizer, from class Optimizer
backend: str.
"cpu" or "gpu" or "tpu".
seed: int
reproducibility seed for nodes_sim=='uniform'
Attributes:
beta_: vector
regression coefficients
Examples:
See https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py
213 def fit(self, X, y, **kwargs): 214 """Fit GLM model to training data (X, y). 215 216 Args: 217 218 X: {array-like}, shape = [n_samples, n_features] 219 Training vectors, where n_samples is the number 220 of samples and n_features is the number of features. 221 222 y: array-like, shape = [n_samples] 223 Target values. 224 225 **kwargs: additional parameters to be passed to 226 self.cook_training_set or self.obj.fit 227 228 Returns: 229 230 self: object 231 232 """ 233 234 assert mx.is_factor( 235 y 236 ), "y must contain only integers" # change is_factor and subsampling everywhere 237 238 self.classes_ = np.unique(y) # for compatibility with sklearn 239 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 240 241 self.beta_ = None 242 243 n, p = X.shape 244 245 self.group_index = n * X.shape[1] 246 247 self.n_classes = len(np.unique(y)) 248 249 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 250 251 # Y = mo.one_hot_encode2(output_y, self.n_classes) 252 Y = self.optimizer.one_hot_encode(output_y, self.n_classes) 253 254 # initialization 255 if self.backend == "cpu": 256 beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 257 else: 258 beta_ = jnp.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 259 260 # optimization 261 # fit(self, loss_func, response, x0, **kwargs): 262 # loss_func(self, beta, group_index, X, y, 263 # row_index=None, type_loss="gaussian", 264 # **kwargs) 265 self.optimizer.fit( 266 self.loss_func, 267 response=y, 268 x0=beta_.flatten(order="F"), 269 group_index=self.group_index, 270 X=scaled_Z, 271 Y=Y, 272 y=y, 273 type_loss=self.family, 274 ) 275 276 self.beta_ = self.optimizer.results[0] 277 self.classes_ = np.unique(y) 278 279 return self
Fit GLM model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
281 def predict(self, X, **kwargs): 282 """Predict test data X. 283 284 Args: 285 286 X: {array-like}, shape = [n_samples, n_features] 287 Training vectors, where n_samples is the number 288 of samples and n_features is the number of features. 289 290 **kwargs: additional parameters to be passed to 291 self.cook_test_set 292 293 Returns: 294 295 model predictions: {array-like} 296 297 """ 298 299 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
301 def predict_proba(self, X, **kwargs): 302 """Predict probabilities for test data X. 303 304 Args: 305 306 X: {array-like}, shape = [n_samples, n_features] 307 Training vectors, where n_samples is the number 308 of samples and n_features is the number of features. 309 310 **kwargs: additional parameters to be passed to 311 self.cook_test_set 312 313 Returns: 314 315 probability estimates for test data: {array-like} 316 317 """ 318 if len(X.shape) == 1: 319 n_features = X.shape[0] 320 new_X = mo.rbind( 321 X.reshape(1, n_features), 322 np.ones(n_features).reshape(1, n_features), 323 ) 324 325 Z = self.cook_test_set(new_X, **kwargs) 326 327 else: 328 Z = self.cook_test_set(X, **kwargs) 329 330 ZB = mo.safe_sparse_dot( 331 Z, 332 self.beta_.reshape( 333 self.n_classes, 334 X.shape[1] + self.n_hidden_features + self.n_clusters, 335 ).T, 336 ) 337 338 if self.family == "logit": 339 exp_ZB = np.exp(ZB) 340 341 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 342 343 if self.family == "expit": 344 exp_ZB = expit(ZB) 345 346 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 347 348 if self.family == "erf": 349 exp_ZB = 0.5 * (1 + erf(ZB)) 350 351 return exp_ZB / exp_ZB.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
353 def score(self, X, y, scoring=None): 354 """Scoring function for classification. 355 356 Args: 357 358 X: {array-like}, shape = [n_samples, n_features] 359 Training vectors, where n_samples is the number 360 of samples and n_features is the number of features. 361 362 y: array-like, shape = [n_samples] 363 Target values. 364 365 scoring: str 366 scoring method (default is accuracy) 367 368 Returns: 369 370 score: float 371 """ 372 373 if scoring is None: 374 scoring = "accuracy" 375 376 if scoring == "accuracy": 377 return skm2.accuracy_score(y, self.predict(X)) 378 379 if scoring == "f1": 380 return skm2.f1_score(y, self.predict(X)) 381 382 if scoring == "precision": 383 return skm2.precision_score(y, self.predict(X)) 384 385 if scoring == "recall": 386 return skm2.recall_score(y, self.predict(X)) 387 388 if scoring == "roc_auc": 389 return skm2.roc_auc_score(y, self.predict(X)) 390 391 if scoring == "log_loss": 392 return skm2.log_loss(y, self.predict_proba(X)) 393 394 if scoring == "balanced_accuracy": 395 return skm2.balanced_accuracy_score(y, self.predict(X)) 396 397 if scoring == "average_precision": 398 return skm2.average_precision_score(y, self.predict(X)) 399 400 if scoring == "neg_brier_score": 401 return -skm2.brier_score_loss(y, self.predict_proba(X)) 402 403 if scoring == "neg_log_loss": 404 return -skm2.log_loss(y, self.predict_proba(X))
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
22class GLMRegressor(GLM, RegressorMixin): 23 """Generalized 'linear' models using quasi-randomized networks (regression) 24 25 Attributes: 26 27 n_hidden_features: int 28 number of nodes in the hidden layer 29 30 lambda1: float 31 regularization parameter for GLM coefficients on original features 32 33 alpha1: float 34 controls compromize between l1 and l2 norm of GLM coefficients on original features 35 36 lambda2: float 37 regularization parameter for GLM coefficients on nonlinear features 38 39 alpha2: float 40 controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features 41 42 family: str 43 "gaussian", "laplace", "poisson", or "quantile" (for now) 44 45 level: int, default=50 46 The level of the quantiles to compute for family = "quantile". 47 Default is the median. 48 49 activation_name: str 50 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 51 52 a: float 53 hyperparameter for 'prelu' or 'elu' activation function 54 55 nodes_sim: str 56 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 57 'uniform' 58 59 bias: boolean 60 indicates if the hidden layer contains a bias term (True) or not 61 (False) 62 63 dropout: float 64 regularization parameter; (random) percentage of nodes dropped out 65 of the training 66 67 direct_link: boolean 68 indicates if the original predictors are included (True) in model's 69 fitting or not (False) 70 71 n_clusters: int 72 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 73 no clustering) 74 75 cluster_encode: bool 76 defines how the variable containing clusters is treated (default is one-hot) 77 if `False`, then labels are used, without one-hot encoding 78 79 type_clust: str 80 type of clustering method: currently k-means ('kmeans') or Gaussian 81 Mixture Model ('gmm') 82 83 type_scaling: a tuple of 3 strings 84 scaling methods for inputs, hidden layer, and clustering respectively 85 (and when relevant). 86 Currently available: standardization ('std') or MinMax scaling ('minmax') 87 88 optimizer: object 89 optimizer, from class nnetsauce.utils.Optimizer 90 91 backend: str. 92 "cpu" or "gpu" or "tpu". 93 94 seed: int 95 reproducibility seed for nodes_sim=='uniform' 96 97 backend: str 98 "cpu", "gpu", "tpu" 99 100 Attributes: 101 102 beta_: vector 103 regression coefficients 104 105 Examples: 106 107 See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py) 108 109 """ 110 111 # construct the object ----- 112 113 def __init__( 114 self, 115 n_hidden_features=5, 116 lambda1=0.01, 117 alpha1=0.5, 118 lambda2=0.01, 119 alpha2=0.5, 120 family="gaussian", 121 level=50, 122 activation_name="relu", 123 a=0.01, 124 nodes_sim="sobol", 125 bias=True, 126 dropout=0, 127 direct_link=True, 128 n_clusters=2, 129 cluster_encode=True, 130 type_clust="kmeans", 131 type_scaling=("std", "std", "std"), 132 optimizer=Optimizer(), 133 backend="cpu", 134 seed=123, 135 ): 136 super().__init__( 137 n_hidden_features=n_hidden_features, 138 lambda1=lambda1, 139 alpha1=alpha1, 140 lambda2=lambda2, 141 alpha2=alpha2, 142 activation_name=activation_name, 143 a=a, 144 nodes_sim=nodes_sim, 145 bias=bias, 146 dropout=dropout, 147 direct_link=direct_link, 148 n_clusters=n_clusters, 149 cluster_encode=cluster_encode, 150 type_clust=type_clust, 151 type_scaling=type_scaling, 152 optimizer=optimizer, 153 backend=backend, 154 seed=seed, 155 ) 156 157 self.family = family 158 self.level = level 159 self.q = self.level / 100 160 161 def gaussian_loss(self, y, row_index, XB): 162 return 0.5 * np.mean(np.square(y[row_index] - XB)) 163 164 def laplace_loss(self, y, row_index, XB): 165 return 0.5 * np.mean(np.abs(y[row_index] - XB)) 166 167 def poisson_loss(self, y, row_index, XB): 168 return -np.mean(y[row_index] * XB - np.exp(XB)) 169 170 def pinball_loss(self, y, row_index, XB, tau=0.5): 171 y = np.array(y[row_index]) 172 y_pred = np.array(XB) 173 return mean_pinball_loss(y, y_pred, alpha=tau) 174 # return np.mean(np.maximum(tau * residuals, (tau - 1) * residuals)) 175 176 def loss_func( 177 self, 178 beta, 179 group_index, 180 X, 181 y, 182 row_index=None, 183 type_loss="gaussian", 184 **kwargs 185 ): 186 res = { 187 "gaussian": self.gaussian_loss, 188 "laplace": self.laplace_loss, 189 "poisson": self.poisson_loss, 190 "quantile": self.pinball_loss, 191 } 192 193 if type_loss != "quantile": 194 if row_index is None: 195 row_index = range(len(y)) 196 XB = self.compute_XB(X, beta=beta) 197 198 return res[type_loss](y, row_index, XB) + self.compute_penalty( 199 group_index=group_index, beta=beta 200 ) 201 202 XB = self.compute_XB(X, beta=beta, row_index=row_index) 203 204 return res[type_loss](y, row_index, XB) + self.compute_penalty( 205 group_index=group_index, beta=beta 206 ) 207 208 else: # quantile 209 assert ( 210 self.q > 0 and self.q < 1 211 ), "'tau' must be comprised 0 < tau < 1" 212 213 if row_index is None: 214 row_index = range(len(y)) 215 XB = self.compute_XB(X, beta=beta) 216 return res[type_loss](y, row_index, XB, self.q) 217 218 XB = self.compute_XB(X, beta=beta, row_index=row_index) 219 return res[type_loss](y, row_index, XB, self.q) 220 221 def fit(self, X, y, **kwargs): 222 """Fit GLM model to training data (X, y). 223 224 Args: 225 226 X: {array-like}, shape = [n_samples, n_features] 227 Training vectors, where n_samples is the number 228 of samples and n_features is the number of features. 229 230 y: array-like, shape = [n_samples] 231 Target values. 232 233 **kwargs: additional parameters to be passed to 234 self.cook_training_set or self.obj.fit 235 236 Returns: 237 238 self: object 239 240 """ 241 self.beta_ = None 242 self.n_iter = 0 243 244 _, self.group_index = X.shape 245 246 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 247 # initialization 248 if self.backend == "cpu": 249 beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 250 else: 251 beta_ = jnp.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 252 # optimization 253 # fit(self, loss_func, response, x0, **kwargs): 254 # loss_func(self, beta, group_index, X, y, 255 # row_index=None, type_loss="gaussian", 256 # **kwargs) 257 self.optimizer.fit( 258 self.loss_func, 259 response=centered_y, 260 x0=beta_, 261 group_index=self.group_index, 262 X=scaled_Z, 263 y=centered_y, 264 type_loss=self.family, 265 **kwargs 266 ) 267 268 self.beta_ = self.optimizer.results[0] 269 270 return self 271 272 def predict(self, X, **kwargs): 273 """Predict test data X. 274 275 Args: 276 277 X: {array-like}, shape = [n_samples, n_features] 278 Training vectors, where n_samples is the number 279 of samples and n_features is the number of features. 280 281 **kwargs: additional parameters to be passed to 282 self.cook_test_set 283 284 Returns: 285 286 model predictions: {array-like} 287 288 """ 289 290 if len(X.shape) == 1: 291 n_features = X.shape[0] 292 new_X = mo.rbind( 293 X.reshape(1, n_features), 294 np.ones(n_features).reshape(1, n_features), 295 ) 296 297 return ( 298 self.y_mean_ 299 + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_) 300 )[0] 301 302 return self.y_mean_ + np.dot( 303 self.cook_test_set(X, **kwargs), self.beta_ 304 ) 305 306 def score(self, X, y, scoring=None): 307 """Compute the score of the model. 308 309 Parameters: 310 311 X: {array-like}, shape = [n_samples, n_features] 312 Training vectors, where n_samples is the number 313 of samples and n_features is the number of features. 314 315 y: array-like, shape = [n_samples] 316 Target values. 317 318 scoring: str 319 scoring method 320 321 Returns: 322 323 score: float 324 325 """ 326 327 if scoring is None: 328 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 329 330 return skm2.get_scorer(scoring)(self, X, y)
Generalized 'linear' models using quasi-randomized networks (regression)
Attributes:
n_hidden_features: int
number of nodes in the hidden layer
lambda1: float
regularization parameter for GLM coefficients on original features
alpha1: float
controls compromize between l1 and l2 norm of GLM coefficients on original features
lambda2: float
regularization parameter for GLM coefficients on nonlinear features
alpha2: float
controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
family: str
"gaussian", "laplace", "poisson", or "quantile" (for now)
level: int, default=50
The level of the quantiles to compute for family = "quantile".
Default is the median.
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
optimizer: object
optimizer, from class Optimizer
backend: str.
"cpu" or "gpu" or "tpu".
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu", "gpu", "tpu"
Attributes:
beta_: vector
regression coefficients
Examples:
See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py)
221 def fit(self, X, y, **kwargs): 222 """Fit GLM model to training data (X, y). 223 224 Args: 225 226 X: {array-like}, shape = [n_samples, n_features] 227 Training vectors, where n_samples is the number 228 of samples and n_features is the number of features. 229 230 y: array-like, shape = [n_samples] 231 Target values. 232 233 **kwargs: additional parameters to be passed to 234 self.cook_training_set or self.obj.fit 235 236 Returns: 237 238 self: object 239 240 """ 241 self.beta_ = None 242 self.n_iter = 0 243 244 _, self.group_index = X.shape 245 246 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 247 # initialization 248 if self.backend == "cpu": 249 beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 250 else: 251 beta_ = jnp.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 252 # optimization 253 # fit(self, loss_func, response, x0, **kwargs): 254 # loss_func(self, beta, group_index, X, y, 255 # row_index=None, type_loss="gaussian", 256 # **kwargs) 257 self.optimizer.fit( 258 self.loss_func, 259 response=centered_y, 260 x0=beta_, 261 group_index=self.group_index, 262 X=scaled_Z, 263 y=centered_y, 264 type_loss=self.family, 265 **kwargs 266 ) 267 268 self.beta_ = self.optimizer.results[0] 269 270 return self
Fit GLM model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
272 def predict(self, X, **kwargs): 273 """Predict test data X. 274 275 Args: 276 277 X: {array-like}, shape = [n_samples, n_features] 278 Training vectors, where n_samples is the number 279 of samples and n_features is the number of features. 280 281 **kwargs: additional parameters to be passed to 282 self.cook_test_set 283 284 Returns: 285 286 model predictions: {array-like} 287 288 """ 289 290 if len(X.shape) == 1: 291 n_features = X.shape[0] 292 new_X = mo.rbind( 293 X.reshape(1, n_features), 294 np.ones(n_features).reshape(1, n_features), 295 ) 296 297 return ( 298 self.y_mean_ 299 + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_) 300 )[0] 301 302 return self.y_mean_ + np.dot( 303 self.cook_test_set(X, **kwargs), self.beta_ 304 )
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
306 def score(self, X, y, scoring=None): 307 """Compute the score of the model. 308 309 Parameters: 310 311 X: {array-like}, shape = [n_samples, n_features] 312 Training vectors, where n_samples is the number 313 of samples and n_features is the number of features. 314 315 y: array-like, shape = [n_samples] 316 Target values. 317 318 scoring: str 319 scoring method 320 321 Returns: 322 323 score: float 324 325 """ 326 327 if scoring is None: 328 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 329 330 return skm2.get_scorer(scoring)(self, X, y)
Compute the score of the model.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method
Returns:
score: float
18class KernelRidge(BaseEstimator, RegressorMixin): 19 """ 20 Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization. 21 22 Parameters: 23 - alpha: float 24 Regularization parameter. 25 - kernel: str 26 Kernel type ("linear", "rbf", or "matern"). 27 - gamma: float 28 Kernel coefficient for "rbf". Ignored for other kernels. 29 - nu: float 30 Smoothness parameter for the Matérn kernel. Default is 1.5. 31 - length_scale: float 32 Length scale parameter for the Matérn kernel. Default is 1.0. 33 - backend: str 34 "cpu" or "gpu" (uses JAX if "gpu"). 35 """ 36 37 def __init__( 38 self, 39 alpha=1.0, 40 kernel="rbf", 41 gamma=None, 42 nu=1.5, 43 length_scale=1.0, 44 backend="cpu", 45 ): 46 if not JAX_AVAILABLE and backend != "cpu": 47 raise RuntimeError( 48 "JAX is required for this feature. Install with: pip install yourpackage[jax]" 49 ) 50 self.alpha = alpha 51 self.alpha_ = alpha 52 self.kernel = kernel 53 self.gamma = gamma 54 self.nu = nu 55 self.length_scale = length_scale 56 self.backend = backend 57 self.scaler = StandardScaler() 58 59 if backend == "gpu" and not JAX_AVAILABLE: 60 raise ImportError( 61 "JAX is not installed. Please install JAX to use the GPU backend." 62 ) 63 64 def _linear_kernel(self, X, Y): 65 return jnp.dot(X, Y.T) if self.backend == "gpu" else np.dot(X, Y.T) 66 67 def _rbf_kernel(self, X, Y): 68 if self.gamma is None: 69 self.gamma = 1.0 / X.shape[1] 70 if self.backend == "gpu": 71 sq_dists = ( 72 jnp.sum(X**2, axis=1)[:, None] 73 + jnp.sum(Y**2, axis=1) 74 - 2 * jnp.dot(X, Y.T) 75 ) 76 return jnp.exp(-self.gamma * sq_dists) 77 else: 78 sq_dists = ( 79 np.sum(X**2, axis=1)[:, None] 80 + np.sum(Y**2, axis=1) 81 - 2 * np.dot(X, Y.T) 82 ) 83 return np.exp(-self.gamma * sq_dists) 84 85 def _matern_kernel(self, X, Y): 86 """ 87 Compute the Matérn kernel using JAX for GPU or NumPy for CPU. 88 89 Parameters: 90 - X: array-like, shape (n_samples_X, n_features) 91 - Y: array-like, shape (n_samples_Y, n_features) 92 93 Returns: 94 - Kernel matrix, shape (n_samples_X, n_samples_Y) 95 """ 96 if self.backend == "gpu": 97 # Compute pairwise distances 98 dists = jnp.sqrt( 99 jnp.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2) 100 ) 101 scaled_dists = jnp.sqrt(2 * self.nu) * dists / self.length_scale 102 103 # Matérn kernel formula 104 coeff = (2 ** (1 - self.nu)) / jnp.exp(gammaln(self.nu)) 105 matern_kernel = ( 106 coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists) 107 ) 108 matern_kernel = jnp.where( 109 dists == 0, 1.0, matern_kernel 110 ) # Handle the case where distance is 0 111 return matern_kernel 112 else: 113 # Use NumPy for CPU 114 from scipy.special import ( 115 gammaln, 116 kv, 117 ) # Ensure scipy.special is used for CPU 118 119 dists = np.sqrt( 120 np.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2) 121 ) 122 scaled_dists = np.sqrt(2 * self.nu) * dists / self.length_scale 123 124 # Matérn kernel formula 125 coeff = (2 ** (1 - self.nu)) / np.exp(gammaln(self.nu)) 126 matern_kernel = ( 127 coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists) 128 ) 129 matern_kernel = np.where( 130 dists == 0, 1.0, matern_kernel 131 ) # Handle the case where distance is 0 132 return matern_kernel 133 134 def _get_kernel(self, X, Y): 135 if self.kernel == "linear": 136 return self._linear_kernel(X, Y) 137 elif self.kernel == "rbf": 138 return self._rbf_kernel(X, Y) 139 elif self.kernel == "matern": 140 return self._matern_kernel(X, Y) 141 else: 142 raise ValueError(f"Unsupported kernel: {self.kernel}") 143 144 def fit(self, X, y): 145 """ 146 Fit the Kernel Ridge Regression model. 147 148 Parameters: 149 - X: array-like, shape (n_samples, n_features) 150 Training data. 151 - y: array-like, shape (n_samples,) 152 Target values. 153 """ 154 # Standardize the inputs 155 X = self.scaler.fit_transform(X) 156 self.X_fit_ = X 157 158 # Center the response 159 self.y_mean_ = np.mean(y) 160 y_centered = y - self.y_mean_ 161 162 n_samples = X.shape[0] 163 164 # Compute the kernel matrix 165 K = self._get_kernel(X, X) 166 self.K_ = K 167 self.y_fit_ = y_centered 168 169 if isinstance(self.alpha, (list, np.ndarray)): 170 # If alpha is a list or array, compute LOOE for each alpha 171 self.alphas_ = self.alpha # Store the list of alphas 172 self.dual_coefs_ = [] # Store dual coefficients for each alpha 173 self.looe_ = [] # Store LOOE for each alpha 174 175 for alpha in self.alpha: 176 G = K + alpha * np.eye(n_samples) 177 G_inv = np.linalg.inv(G) 178 diag_G_inv = np.diag(G_inv) 179 dual_coef = np.linalg.solve(G, y_centered) 180 looe = np.sum((dual_coef / diag_G_inv) ** 2) # Compute LOOE 181 self.dual_coefs_.append(dual_coef) 182 self.looe_.append(looe) 183 184 # Select the best alpha based on the smallest LOOE 185 best_index = np.argmin(self.looe_) 186 self.alpha_ = self.alpha[best_index] 187 self.dual_coef_ = self.dual_coefs_[best_index] 188 else: 189 # If alpha is a single value, proceed as usual 190 if self.backend == "gpu": 191 self.dual_coef_ = jnp.linalg.solve( 192 K + self.alpha * jnp.eye(n_samples), y_centered 193 ) 194 else: 195 self.dual_coef_ = np.linalg.solve( 196 K + self.alpha * np.eye(n_samples), y_centered 197 ) 198 199 return self 200 201 def predict(self, X, probs=False): 202 """ 203 Predict using the Kernel Ridge Regression model. 204 205 Parameters: 206 - X: array-like, shape (n_samples, n_features) 207 Test data. 208 209 Returns: 210 - Predicted values, shape (n_samples,). 211 """ 212 # Standardize the inputs 213 X = self.scaler.transform(X) 214 K = self._get_kernel(X, self.X_fit_) 215 if self.backend == "gpu": 216 preds = jnp.dot(K, self.dual_coef_) + self.y_mean_ 217 if probs: 218 # Compute similarity to self.X_fit_ 219 similarities = jnp.dot( 220 preds, self.X_fit_.T 221 ) # Shape: (n_samples, n_fit_) 222 # Apply softmax to get probabilities 223 return jaxsoftmax(similarities, axis=1) 224 return preds 225 else: 226 preds = np.dot(K, self.dual_coef_) + self.y_mean_ 227 if probs: 228 # Compute similarity to self.X_fit_ 229 similarities = np.dot( 230 preds, self.X_fit_.T 231 ) # Shape: (n_samples, n_fit_) 232 # Apply softmax to get probabilities 233 return softmax(similarities, axis=1) 234 return preds 235 236 def partial_fit(self, X, y): 237 """ 238 Incrementally fit the Kernel Ridge Regression model with new data using a recursive approach. 239 240 Parameters: 241 - X: array-like, shape (n_samples, n_features) 242 New training data. 243 - y: array-like, shape (n_samples,) 244 New target values. 245 246 Returns: 247 - self: object 248 The updated model. 249 """ 250 # Standardize the inputs 251 X = ( 252 self.scaler.fit_transform(X) 253 if not hasattr(self, "X_fit_") 254 else self.scaler.transform(X) 255 ) 256 257 if not hasattr(self, "X_fit_"): 258 # Initialize with the first batch of data 259 self.X_fit_ = X 260 261 # Center the response 262 self.y_mean_ = np.mean(y) 263 y_centered = y - self.y_mean_ 264 self.y_fit_ = y_centered 265 266 n_samples = X.shape[0] 267 268 # Compute the kernel matrix for the initial data 269 self.K_ = self._get_kernel(X, X) 270 271 # Initialize dual coefficients for each alpha 272 if isinstance(self.alpha, (list, np.ndarray)): 273 self.dual_coefs_ = [np.zeros(n_samples) for _ in self.alpha] 274 else: 275 self.dual_coef_ = np.zeros(n_samples) 276 else: 277 # Incrementally update with new data 278 y_centered = y - self.y_mean_ # Center the new batch of responses 279 for x_new, y_new in zip(X, y_centered): 280 x_new = x_new.reshape(1, -1) # Ensure x_new is 2D 281 k_new = self._get_kernel(self.X_fit_, x_new).flatten() 282 283 # Compute the kernel value for the new data point 284 k_self = self._get_kernel(x_new, x_new).item() 285 286 if isinstance(self.alpha, (list, np.ndarray)): 287 # Update dual coefficients for each alpha 288 for idx, alpha in enumerate(self.alpha): 289 gamma_new = 1 / (k_self + alpha) 290 residual = y_new - np.dot(self.dual_coefs_[idx], k_new) 291 self.dual_coefs_[idx] = np.append( 292 self.dual_coefs_[idx], gamma_new * residual 293 ) 294 else: 295 # Update dual coefficients for a single alpha 296 gamma_new = 1 / (k_self + self.alpha) 297 residual = y_new - np.dot(self.dual_coef_, k_new) 298 self.dual_coef_ = np.append( 299 self.dual_coef_, gamma_new * residual 300 ) 301 302 # Update the kernel matrix 303 self.K_ = np.block( 304 [ 305 [self.K_, k_new[:, None]], 306 [k_new[None, :], np.array([[k_self]])], 307 ] 308 ) 309 310 # Update the stored data 311 self.X_fit_ = np.vstack([self.X_fit_, x_new]) 312 self.y_fit_ = np.append(self.y_fit_, y_new) 313 314 # Select the best alpha based on LOOE after the batch 315 if isinstance(self.alpha, (list, np.ndarray)): 316 self.looe_ = [] 317 for idx, alpha in enumerate(self.alpha): 318 G = self.K_ + alpha * np.eye(self.K_.shape[0]) 319 G_inv = np.linalg.inv(G) 320 diag_G_inv = np.diag(G_inv) 321 looe = np.sum((self.dual_coefs_[idx] / diag_G_inv) ** 2) 322 self.looe_.append(looe) 323 324 # Select the best alpha 325 best_index = np.argmin(self.looe_) 326 self.alpha_ = self.alpha[best_index] 327 self.dual_coef_ = self.dual_coefs_[best_index] 328 329 return self
Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization.
Parameters:
- alpha: float Regularization parameter.
- kernel: str Kernel type ("linear", "rbf", or "matern").
- gamma: float Kernel coefficient for "rbf". Ignored for other kernels.
- nu: float Smoothness parameter for the Matérn kernel. Default is 1.5.
- length_scale: float Length scale parameter for the Matérn kernel. Default is 1.0.
- backend: str "cpu" or "gpu" (uses JAX if "gpu").
144 def fit(self, X, y): 145 """ 146 Fit the Kernel Ridge Regression model. 147 148 Parameters: 149 - X: array-like, shape (n_samples, n_features) 150 Training data. 151 - y: array-like, shape (n_samples,) 152 Target values. 153 """ 154 # Standardize the inputs 155 X = self.scaler.fit_transform(X) 156 self.X_fit_ = X 157 158 # Center the response 159 self.y_mean_ = np.mean(y) 160 y_centered = y - self.y_mean_ 161 162 n_samples = X.shape[0] 163 164 # Compute the kernel matrix 165 K = self._get_kernel(X, X) 166 self.K_ = K 167 self.y_fit_ = y_centered 168 169 if isinstance(self.alpha, (list, np.ndarray)): 170 # If alpha is a list or array, compute LOOE for each alpha 171 self.alphas_ = self.alpha # Store the list of alphas 172 self.dual_coefs_ = [] # Store dual coefficients for each alpha 173 self.looe_ = [] # Store LOOE for each alpha 174 175 for alpha in self.alpha: 176 G = K + alpha * np.eye(n_samples) 177 G_inv = np.linalg.inv(G) 178 diag_G_inv = np.diag(G_inv) 179 dual_coef = np.linalg.solve(G, y_centered) 180 looe = np.sum((dual_coef / diag_G_inv) ** 2) # Compute LOOE 181 self.dual_coefs_.append(dual_coef) 182 self.looe_.append(looe) 183 184 # Select the best alpha based on the smallest LOOE 185 best_index = np.argmin(self.looe_) 186 self.alpha_ = self.alpha[best_index] 187 self.dual_coef_ = self.dual_coefs_[best_index] 188 else: 189 # If alpha is a single value, proceed as usual 190 if self.backend == "gpu": 191 self.dual_coef_ = jnp.linalg.solve( 192 K + self.alpha * jnp.eye(n_samples), y_centered 193 ) 194 else: 195 self.dual_coef_ = np.linalg.solve( 196 K + self.alpha * np.eye(n_samples), y_centered 197 ) 198 199 return self
Fit the Kernel Ridge Regression model.
Parameters:
- X: array-like, shape (n_samples, n_features) Training data.
- y: array-like, shape (n_samples,) Target values.
201 def predict(self, X, probs=False): 202 """ 203 Predict using the Kernel Ridge Regression model. 204 205 Parameters: 206 - X: array-like, shape (n_samples, n_features) 207 Test data. 208 209 Returns: 210 - Predicted values, shape (n_samples,). 211 """ 212 # Standardize the inputs 213 X = self.scaler.transform(X) 214 K = self._get_kernel(X, self.X_fit_) 215 if self.backend == "gpu": 216 preds = jnp.dot(K, self.dual_coef_) + self.y_mean_ 217 if probs: 218 # Compute similarity to self.X_fit_ 219 similarities = jnp.dot( 220 preds, self.X_fit_.T 221 ) # Shape: (n_samples, n_fit_) 222 # Apply softmax to get probabilities 223 return jaxsoftmax(similarities, axis=1) 224 return preds 225 else: 226 preds = np.dot(K, self.dual_coef_) + self.y_mean_ 227 if probs: 228 # Compute similarity to self.X_fit_ 229 similarities = np.dot( 230 preds, self.X_fit_.T 231 ) # Shape: (n_samples, n_fit_) 232 # Apply softmax to get probabilities 233 return softmax(similarities, axis=1) 234 return preds
Predict using the Kernel Ridge Regression model.
Parameters:
- X: array-like, shape (n_samples, n_features) Test data.
Returns:
- Predicted values, shape (n_samples,).
757class LazyClassifier(LazyDeepClassifier): 758 """ 759 Fitting -- almost -- all the classification algorithms with 760 nnetsauce's CustomClassifier and returning their scores (no layers). 761 762 Parameters: 763 764 verbose: int, optional (default=0) 765 Any positive number for verbosity. 766 767 ignore_warnings: bool, optional (default=True) 768 When set to True, the warning related to algorigms that are not able to run are ignored. 769 770 custom_metric: function, optional (default=None) 771 When function is provided, models are evaluated based on the custom evaluation metric provided. 772 773 predictions: bool, optional (default=False) 774 When set to True, the predictions of all the models models are returned as dataframe. 775 776 sort_by: string, optional (default='Accuracy') 777 Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score' 778 or a custom metric identified by its name and provided by custom_metric. 779 780 random_state: int, optional (default=42) 781 Reproducibiility seed. 782 783 estimators: list, optional (default='all') 784 list of Estimators names or just 'all' (default='all') 785 786 preprocess: bool 787 preprocessing is done when set to True 788 789 n_jobs : int, when possible, run in parallel 790 For now, only used by individual models that support it. 791 792 All the other parameters are the same as CustomClassifier's. 793 794 Attributes: 795 796 models_: dict-object 797 Returns a dictionary with each model pipeline as value 798 with key as name of models. 799 800 best_model_: object 801 Returns the best model pipeline based on the sort_by metric. 802 803 Examples: 804 805 import nnetsauce as ns 806 import numpy as np 807 from sklearn import datasets 808 from sklearn.utils import shuffle 809 810 dataset = datasets.load_iris() 811 X = dataset.data 812 y = dataset.target 813 X, y = shuffle(X, y, random_state=123) 814 X = X.astype(np.float32) 815 y = y.astype(np.float32) 816 X_train, X_test = X[:100], X[100:] 817 y_train, y_test = y[:100], y[100:] 818 819 clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None) 820 models, predictions = clf.fit(X_train, X_test, y_train, y_test) 821 model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test) 822 print(models) 823 824 """ 825 826 def __init__( 827 self, 828 verbose=0, 829 ignore_warnings=True, 830 custom_metric=None, 831 predictions=False, 832 sort_by="Accuracy", 833 random_state=42, 834 estimators="all", 835 preprocess=False, 836 n_jobs=None, 837 # CustomClassifier attributes 838 obj=None, 839 n_hidden_features=5, 840 activation_name="relu", 841 a=0.01, 842 nodes_sim="sobol", 843 bias=True, 844 dropout=0, 845 direct_link=True, 846 n_clusters=2, 847 cluster_encode=True, 848 type_clust="kmeans", 849 type_scaling=("std", "std", "std"), 850 col_sample=1, 851 row_sample=1, 852 seed=123, 853 backend="cpu", 854 ): 855 super().__init__( 856 verbose=verbose, 857 ignore_warnings=ignore_warnings, 858 custom_metric=custom_metric, 859 predictions=predictions, 860 sort_by=sort_by, 861 random_state=random_state, 862 estimators=estimators, 863 preprocess=preprocess, 864 n_jobs=n_jobs, 865 n_layers=1, 866 obj=obj, 867 n_hidden_features=n_hidden_features, 868 activation_name=activation_name, 869 a=a, 870 nodes_sim=nodes_sim, 871 bias=bias, 872 dropout=dropout, 873 direct_link=direct_link, 874 n_clusters=n_clusters, 875 cluster_encode=cluster_encode, 876 type_clust=type_clust, 877 type_scaling=type_scaling, 878 col_sample=col_sample, 879 row_sample=row_sample, 880 seed=seed, 881 backend=backend, 882 )
Fitting -- almost -- all the classification algorithms with nnetsauce's CustomClassifier and returning their scores (no layers).
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='Accuracy')
Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score'
or a custom metric identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' (default='all')
preprocess: bool
preprocessing is done when set to True
n_jobs : int, when possible, run in parallel
For now, only used by individual models that support it.
All the other parameters are the same as CustomClassifier's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
dataset = datasets.load_iris()
X = dataset.data
y = dataset.target
X, y = shuffle(X, y, random_state=123)
X = X.astype(np.float32)
y = y.astype(np.float32)
X_train, X_test = X[:100], X[100:]
y_train, y_test = y[:100], y[100:]
clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
print(models)
654class LazyRegressor(LazyDeepRegressor): 655 """ 656 Fitting -- almost -- all the regression algorithms with 657 nnetsauce's CustomRegressor and returning their scores. 658 659 Parameters: 660 661 verbose: int, optional (default=0) 662 Any positive number for verbosity. 663 664 ignore_warnings: bool, optional (default=True) 665 When set to True, the warning related to algorigms that are not able to run are ignored. 666 667 custom_metric: function, optional (default=None) 668 When function is provided, models are evaluated based on the custom evaluation metric provided. 669 670 predictions: bool, optional (default=False) 671 When set to True, the predictions of all the models models are returned as dataframe. 672 673 sort_by: string, optional (default='RMSE') 674 Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'. 675 or a custom metric identified by its name and provided by custom_metric. 676 677 random_state: int, optional (default=42) 678 Reproducibiility seed. 679 680 estimators: list, optional (default='all') 681 list of Estimators names or just 'all' (default='all') 682 683 preprocess: bool 684 preprocessing is done when set to True 685 686 n_jobs : int, when possible, run in parallel 687 For now, only used by individual models that support it. 688 689 All the other parameters are the same as CustomRegressor's. 690 691 Attributes: 692 693 models_: dict-object 694 Returns a dictionary with each model pipeline as value 695 with key as name of models. 696 697 best_model_: object 698 Returns the best model pipeline based on the sort_by metric. 699 700 Examples: 701 702 import nnetsauce as ns 703 import numpy as np 704 from sklearn import datasets 705 from sklearn.utils import shuffle 706 707 diabetes = datasets.load_diabetes() 708 X, y = shuffle(diabetes.data, diabetes.target, random_state=13) 709 X = X.astype(np.float32) 710 711 offset = int(X.shape[0] * 0.9) 712 X_train, y_train = X[:offset], y[:offset] 713 X_test, y_test = X[offset:], y[offset:] 714 715 reg = ns.LazyRegressor(verbose=0, ignore_warnings=False, 716 custom_metric=None) 717 models, predictions = reg.fit(X_train, X_test, y_train, y_test) 718 print(models) 719 720 """ 721 722 def __init__( 723 self, 724 verbose=0, 725 ignore_warnings=True, 726 custom_metric=None, 727 predictions=False, 728 sort_by="RMSE", 729 random_state=42, 730 estimators="all", 731 preprocess=False, 732 n_jobs=None, 733 # CustomRegressor attributes 734 obj=None, 735 n_hidden_features=5, 736 activation_name="relu", 737 a=0.01, 738 nodes_sim="sobol", 739 bias=True, 740 dropout=0, 741 direct_link=True, 742 n_clusters=2, 743 cluster_encode=True, 744 type_clust="kmeans", 745 type_scaling=("std", "std", "std"), 746 col_sample=1, 747 row_sample=1, 748 seed=123, 749 backend="cpu", 750 ): 751 super().__init__( 752 verbose=verbose, 753 ignore_warnings=ignore_warnings, 754 custom_metric=custom_metric, 755 predictions=predictions, 756 sort_by=sort_by, 757 random_state=random_state, 758 estimators=estimators, 759 preprocess=preprocess, 760 n_jobs=n_jobs, 761 n_layers=1, 762 obj=obj, 763 n_hidden_features=n_hidden_features, 764 activation_name=activation_name, 765 a=a, 766 nodes_sim=nodes_sim, 767 bias=bias, 768 dropout=dropout, 769 direct_link=direct_link, 770 n_clusters=n_clusters, 771 cluster_encode=cluster_encode, 772 type_clust=type_clust, 773 type_scaling=type_scaling, 774 col_sample=col_sample, 775 row_sample=row_sample, 776 seed=seed, 777 backend=backend, 778 )
Fitting -- almost -- all the regression algorithms with nnetsauce's CustomRegressor and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
or a custom metric identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' (default='all')
preprocess: bool
preprocessing is done when set to True
n_jobs : int, when possible, run in parallel
For now, only used by individual models that support it.
All the other parameters are the same as CustomRegressor's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)
offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]
reg = ns.LazyRegressor(verbose=0, ignore_warnings=False,
custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
94class LazyDeepClassifier(Custom, ClassifierMixin): 95 """ 96 97 Fitting -- almost -- all the classification algorithms with layers of 98 nnetsauce's CustomClassifier and returning their scores. 99 100 Parameters: 101 102 verbose: int, optional (default=0) 103 Any positive number for verbosity. 104 105 ignore_warnings: bool, optional (default=True) 106 When set to True, the warning related to algorigms that are not 107 able to run are ignored. 108 109 custom_metric: function, optional (default=None) 110 When function is provided, models are evaluated based on the custom 111 evaluation metric provided. 112 113 predictions: bool, optional (default=False) 114 When set to True, the predictions of all the models models are 115 returned as data frame. 116 117 sort_by: string, optional (default='Accuracy') 118 Sort models by a metric. Available options are 'Accuracy', 119 'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric 120 identified by its name and provided by custom_metric. 121 122 random_state: int, optional (default=42) 123 Reproducibiility seed. 124 125 estimators: list, optional (default='all') 126 list of Estimators names or just 'all' for > 90 classifiers 127 (default='all') 128 129 preprocess: bool, preprocessing is done when set to True 130 131 n_jobs: int, when possible, run in parallel 132 For now, only used by individual models that support it. 133 134 n_layers: int, optional (default=3) 135 Number of layers of CustomClassifiers to be used. 136 137 All the other parameters are the same as CustomClassifier's. 138 139 Attributes: 140 141 models_: dict-object 142 Returns a dictionary with each model pipeline as value 143 with key as name of models. 144 145 best_model_: object 146 Returns the best model pipeline. 147 148 Examples 149 150 ```python 151 import nnetsauce as ns 152 from sklearn.datasets import load_breast_cancer 153 from sklearn.model_selection import train_test_split 154 data = load_breast_cancer() 155 X = data.data 156 y= data.target 157 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, 158 random_state=123) 159 clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None) 160 models, predictions = clf.fit(X_train, X_test, y_train, y_test) 161 model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test) 162 print(models) 163 ``` 164 165 """ 166 167 def __init__( 168 self, 169 verbose=0, 170 ignore_warnings=True, 171 custom_metric=None, 172 predictions=False, 173 sort_by="Accuracy", 174 random_state=42, 175 estimators="all", 176 preprocess=False, 177 n_jobs=None, 178 # Defining depth 179 n_layers=3, 180 # CustomClassifier attributes 181 obj=None, 182 n_hidden_features=5, 183 activation_name="relu", 184 a=0.01, 185 nodes_sim="sobol", 186 bias=True, 187 dropout=0, 188 direct_link=True, 189 n_clusters=2, 190 cluster_encode=True, 191 type_clust="kmeans", 192 type_scaling=("std", "std", "std"), 193 col_sample=1, 194 row_sample=1, 195 seed=123, 196 backend="cpu", 197 ): 198 self.verbose = verbose 199 self.ignore_warnings = ignore_warnings 200 self.custom_metric = custom_metric 201 self.predictions = predictions 202 self.sort_by = sort_by 203 self.models_ = {} 204 self.best_model_ = None 205 self.random_state = random_state 206 self.estimators = estimators 207 self.preprocess = preprocess 208 self.n_layers = n_layers - 1 209 self.n_jobs = n_jobs 210 super().__init__( 211 obj=obj, 212 n_hidden_features=n_hidden_features, 213 activation_name=activation_name, 214 a=a, 215 nodes_sim=nodes_sim, 216 bias=bias, 217 dropout=dropout, 218 direct_link=direct_link, 219 n_clusters=n_clusters, 220 cluster_encode=cluster_encode, 221 type_clust=type_clust, 222 type_scaling=type_scaling, 223 col_sample=col_sample, 224 row_sample=row_sample, 225 seed=seed, 226 backend=backend, 227 ) 228 229 def fit(self, X_train, X_test, y_train, y_test): 230 """Fit classifiers to X_train and y_train, predict and score on X_test, 231 y_test. 232 233 Parameters: 234 235 X_train: array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 X_test: array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 y_train: array-like, 244 Training vectors, where rows is the number of samples 245 and columns is the number of features. 246 247 y_test: array-like, 248 Testing vectors, where rows is the number of samples 249 and columns is the number of features. 250 251 Returns: 252 253 scores: Pandas DataFrame 254 Returns metrics of all the models in a Pandas DataFrame. 255 256 predictions: Pandas DataFrame 257 Returns predictions of all the models in a Pandas DataFrame. 258 """ 259 Accuracy = [] 260 B_Accuracy = [] 261 ROC_AUC = [] 262 F1 = [] 263 names = [] 264 TIME = [] 265 predictions = {} 266 267 if self.custom_metric is not None: 268 CUSTOM_METRIC = [] 269 270 if isinstance(X_train, np.ndarray): 271 X_train = pd.DataFrame(X_train) 272 X_test = pd.DataFrame(X_test) 273 274 numeric_features = X_train.select_dtypes(include=[np.number]).columns 275 categorical_features = X_train.select_dtypes(include=["object"]).columns 276 277 categorical_low, categorical_high = get_card_split( 278 X_train, categorical_features 279 ) 280 281 if self.preprocess is True: 282 preprocessor = ColumnTransformer( 283 transformers=[ 284 ("numeric", numeric_transformer, numeric_features), 285 ( 286 "categorical_low", 287 categorical_transformer_low, 288 categorical_low, 289 ), 290 ( 291 "categorical_high", 292 categorical_transformer_high, 293 categorical_high, 294 ), 295 ] 296 ) 297 298 # baseline models 299 try: 300 baseline_names = ["RandomForestClassifier", "XGBClassifier"] 301 baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()] 302 except Exception as exception: 303 baseline_names = ["RandomForestClassifier"] 304 baseline_models = [RandomForestClassifier()] 305 306 for name, model in zip(baseline_names, baseline_models): 307 start = time.time() 308 try: 309 model.fit(X_train, y_train) 310 self.models_[name] = model 311 y_pred = model.predict(X_test) 312 accuracy = accuracy_score(y_test, y_pred, normalize=True) 313 b_accuracy = balanced_accuracy_score(y_test, y_pred) 314 f1 = f1_score(y_test, y_pred, average="weighted") 315 try: 316 roc_auc = roc_auc_score(y_test, y_pred) 317 except Exception as exception: 318 roc_auc = None 319 if self.ignore_warnings is False: 320 print("ROC AUC couldn't be calculated for " + name) 321 print(exception) 322 names.append(name) 323 Accuracy.append(accuracy) 324 B_Accuracy.append(b_accuracy) 325 ROC_AUC.append(roc_auc) 326 F1.append(f1) 327 TIME.append(time.time() - start) 328 if self.custom_metric is not None: 329 custom_metric = self.custom_metric(y_test, y_pred) 330 CUSTOM_METRIC.append(custom_metric) 331 if self.verbose > 0: 332 if self.custom_metric is not None: 333 print( 334 { 335 "Model": name, 336 "Accuracy": accuracy, 337 "Balanced Accuracy": b_accuracy, 338 "ROC AUC": roc_auc, 339 "F1 Score": f1, 340 self.custom_metric.__name__: custom_metric, 341 "Time taken": time.time() - start, 342 } 343 ) 344 else: 345 print( 346 { 347 "Model": name, 348 "Accuracy": accuracy, 349 "Balanced Accuracy": b_accuracy, 350 "ROC AUC": roc_auc, 351 "F1 Score": f1, 352 "Time taken": time.time() - start, 353 } 354 ) 355 if self.predictions: 356 predictions[name] = y_pred 357 except Exception as exception: 358 if self.ignore_warnings is False: 359 print(name + " model failed to execute") 360 print(exception) 361 362 if self.estimators == "all": 363 self.classifiers = [ 364 item 365 for sublist in [ 366 DEEPCLASSIFIERS, 367 DEEPMULTITASKCLASSIFIERS, 368 DEEPSIMPLEMULTITASKCLASSIFIERS, 369 ] 370 for item in sublist 371 ] 372 else: 373 self.classifiers = ( 374 [ 375 ("DeepCustomClassifier(" + est[0] + ")", est[1]) 376 for est in all_estimators() 377 if ( 378 issubclass(est[1], ClassifierMixin) 379 and (est[0] in self.estimators) 380 ) 381 ] 382 + [ 383 ( 384 "DeepMultitaskClassifier(" + est[0] + ")", 385 partial(MultitaskClassifier, obj=est[1]()), 386 ) 387 for est in all_estimators() 388 if ( 389 issubclass(est[1], RegressorMixin) 390 and (est[0] in self.estimators) 391 ) 392 ] 393 + [ 394 ( 395 "DeepSimpleMultitaskClassifier(" + est[0] + ")", 396 partial(SimpleMultitaskClassifier, obj=est[1]()), 397 ) 398 for est in all_estimators() 399 if ( 400 issubclass(est[1], RegressorMixin) 401 and (est[0] in self.estimators) 402 ) 403 ] 404 ) 405 406 if self.preprocess is True: 407 for name, model in tqdm(self.classifiers): # do parallel exec 408 other_args = ( 409 {} 410 ) # use this trick for `random_state` too --> refactor 411 try: 412 if ( 413 "n_jobs" in model().get_params().keys() 414 and name.find("LogisticRegression") == -1 415 ): 416 other_args["n_jobs"] = self.n_jobs 417 except Exception: 418 pass 419 420 start = time.time() 421 422 try: 423 if "random_state" in model().get_params().keys(): 424 layer_clf = CustomClassifier( 425 obj=model(random_state=self.random_state), 426 n_hidden_features=self.n_hidden_features, 427 activation_name=self.activation_name, 428 a=self.a, 429 nodes_sim=self.nodes_sim, 430 bias=self.bias, 431 dropout=self.dropout, 432 direct_link=self.direct_link, 433 n_clusters=self.n_clusters, 434 cluster_encode=self.cluster_encode, 435 type_clust=self.type_clust, 436 type_scaling=self.type_scaling, 437 col_sample=self.col_sample, 438 row_sample=self.row_sample, 439 seed=self.seed, 440 backend=self.backend, 441 cv_calibration=None, 442 ) 443 444 else: 445 layer_clf = CustomClassifier( 446 obj=model(), 447 n_hidden_features=self.n_hidden_features, 448 activation_name=self.activation_name, 449 a=self.a, 450 nodes_sim=self.nodes_sim, 451 bias=self.bias, 452 dropout=self.dropout, 453 direct_link=self.direct_link, 454 n_clusters=self.n_clusters, 455 cluster_encode=self.cluster_encode, 456 type_clust=self.type_clust, 457 type_scaling=self.type_scaling, 458 col_sample=self.col_sample, 459 row_sample=self.row_sample, 460 seed=self.seed, 461 backend=self.backend, 462 cv_calibration=None, 463 ) 464 465 layer_clf.fit(X_train, y_train) 466 467 for _ in range(self.n_layers): 468 layer_clf = deepcopy( 469 CustomClassifier( 470 obj=layer_clf, 471 n_hidden_features=self.n_hidden_features, 472 activation_name=self.activation_name, 473 a=self.a, 474 nodes_sim=self.nodes_sim, 475 bias=self.bias, 476 dropout=self.dropout, 477 direct_link=self.direct_link, 478 n_clusters=self.n_clusters, 479 cluster_encode=self.cluster_encode, 480 type_clust=self.type_clust, 481 type_scaling=self.type_scaling, 482 col_sample=self.col_sample, 483 row_sample=self.row_sample, 484 seed=self.seed, 485 backend=self.backend, 486 cv_calibration=None, 487 ) 488 ) 489 490 pipe = Pipeline( 491 [ 492 ("preprocessor", preprocessor), 493 ("classifier", layer_clf), 494 ] 495 ) 496 497 pipe.fit(X_train, y_train) 498 self.models_[name] = pipe 499 y_pred = pipe.predict(X_test) 500 accuracy = accuracy_score(y_test, y_pred, normalize=True) 501 b_accuracy = balanced_accuracy_score(y_test, y_pred) 502 f1 = f1_score(y_test, y_pred, average="weighted") 503 try: 504 roc_auc = roc_auc_score(y_test, y_pred) 505 except Exception as exception: 506 roc_auc = None 507 if self.ignore_warnings is False: 508 print("ROC AUC couldn't be calculated for " + name) 509 print(exception) 510 names.append(name) 511 Accuracy.append(accuracy) 512 B_Accuracy.append(b_accuracy) 513 ROC_AUC.append(roc_auc) 514 F1.append(f1) 515 TIME.append(time.time() - start) 516 if self.custom_metric is not None: 517 custom_metric = self.custom_metric(y_test, y_pred) 518 CUSTOM_METRIC.append(custom_metric) 519 if self.verbose > 0: 520 if self.custom_metric is not None: 521 print( 522 { 523 "Model": name, 524 "Accuracy": accuracy, 525 "Balanced Accuracy": b_accuracy, 526 "ROC AUC": roc_auc, 527 "F1 Score": f1, 528 self.custom_metric.__name__: custom_metric, 529 "Time taken": time.time() - start, 530 } 531 ) 532 else: 533 print( 534 { 535 "Model": name, 536 "Accuracy": accuracy, 537 "Balanced Accuracy": b_accuracy, 538 "ROC AUC": roc_auc, 539 "F1 Score": f1, 540 "Time taken": time.time() - start, 541 } 542 ) 543 if self.predictions: 544 predictions[name] = y_pred 545 except Exception as exception: 546 if self.ignore_warnings is False: 547 print(name + " model failed to execute") 548 print(exception) 549 550 else: # no preprocessing 551 for name, model in tqdm(self.classifiers): # do parallel exec 552 start = time.time() 553 try: 554 if "random_state" in model().get_params().keys(): 555 layer_clf = CustomClassifier( 556 obj=model(random_state=self.random_state), 557 n_hidden_features=self.n_hidden_features, 558 activation_name=self.activation_name, 559 a=self.a, 560 nodes_sim=self.nodes_sim, 561 bias=self.bias, 562 dropout=self.dropout, 563 direct_link=self.direct_link, 564 n_clusters=self.n_clusters, 565 cluster_encode=self.cluster_encode, 566 type_clust=self.type_clust, 567 type_scaling=self.type_scaling, 568 col_sample=self.col_sample, 569 row_sample=self.row_sample, 570 seed=self.seed, 571 backend=self.backend, 572 cv_calibration=None, 573 ) 574 575 else: 576 layer_clf = CustomClassifier( 577 obj=model(), 578 n_hidden_features=self.n_hidden_features, 579 activation_name=self.activation_name, 580 a=self.a, 581 nodes_sim=self.nodes_sim, 582 bias=self.bias, 583 dropout=self.dropout, 584 direct_link=self.direct_link, 585 n_clusters=self.n_clusters, 586 cluster_encode=self.cluster_encode, 587 type_clust=self.type_clust, 588 type_scaling=self.type_scaling, 589 col_sample=self.col_sample, 590 row_sample=self.row_sample, 591 seed=self.seed, 592 backend=self.backend, 593 cv_calibration=None, 594 ) 595 596 layer_clf.fit(X_train, y_train) 597 598 for _ in range(self.n_layers): 599 layer_clf = deepcopy( 600 CustomClassifier( 601 obj=layer_clf, 602 n_hidden_features=self.n_hidden_features, 603 activation_name=self.activation_name, 604 a=self.a, 605 nodes_sim=self.nodes_sim, 606 bias=self.bias, 607 dropout=self.dropout, 608 direct_link=self.direct_link, 609 n_clusters=self.n_clusters, 610 cluster_encode=self.cluster_encode, 611 type_clust=self.type_clust, 612 type_scaling=self.type_scaling, 613 col_sample=self.col_sample, 614 row_sample=self.row_sample, 615 seed=self.seed, 616 backend=self.backend, 617 cv_calibration=None, 618 ) 619 ) 620 621 # layer_clf.fit(X_train, y_train) 622 623 layer_clf.fit(X_train, y_train) 624 625 self.models_[name] = layer_clf 626 y_pred = layer_clf.predict(X_test) 627 accuracy = accuracy_score(y_test, y_pred, normalize=True) 628 b_accuracy = balanced_accuracy_score(y_test, y_pred) 629 f1 = f1_score(y_test, y_pred, average="weighted") 630 try: 631 roc_auc = roc_auc_score(y_test, y_pred) 632 except Exception as exception: 633 roc_auc = None 634 if self.ignore_warnings is False: 635 print("ROC AUC couldn't be calculated for " + name) 636 print(exception) 637 names.append(name) 638 Accuracy.append(accuracy) 639 B_Accuracy.append(b_accuracy) 640 ROC_AUC.append(roc_auc) 641 F1.append(f1) 642 TIME.append(time.time() - start) 643 if self.custom_metric is not None: 644 custom_metric = self.custom_metric(y_test, y_pred) 645 CUSTOM_METRIC.append(custom_metric) 646 if self.verbose > 0: 647 if self.custom_metric is not None: 648 print( 649 { 650 "Model": name, 651 "Accuracy": accuracy, 652 "Balanced Accuracy": b_accuracy, 653 "ROC AUC": roc_auc, 654 "F1 Score": f1, 655 self.custom_metric.__name__: custom_metric, 656 "Time taken": time.time() - start, 657 } 658 ) 659 else: 660 print( 661 { 662 "Model": name, 663 "Accuracy": accuracy, 664 "Balanced Accuracy": b_accuracy, 665 "ROC AUC": roc_auc, 666 "F1 Score": f1, 667 "Time taken": time.time() - start, 668 } 669 ) 670 if self.predictions: 671 predictions[name] = y_pred 672 except Exception as exception: 673 if self.ignore_warnings is False: 674 print(name + " model failed to execute") 675 print(exception) 676 677 if self.custom_metric is None: 678 scores = pd.DataFrame( 679 { 680 "Model": names, 681 "Accuracy": Accuracy, 682 "Balanced Accuracy": B_Accuracy, 683 "ROC AUC": ROC_AUC, 684 "F1 Score": F1, 685 "Time Taken": TIME, 686 } 687 ) 688 else: 689 scores = pd.DataFrame( 690 { 691 "Model": names, 692 "Accuracy": Accuracy, 693 "Balanced Accuracy": B_Accuracy, 694 "ROC AUC": ROC_AUC, 695 "F1 Score": F1, 696 "Custom metric": CUSTOM_METRIC, 697 "Time Taken": TIME, 698 } 699 ) 700 scores = scores.sort_values(by=self.sort_by, ascending=False).set_index( 701 "Model" 702 ) 703 704 self.best_model_ = self.models_[scores.index[0]] 705 706 if self.predictions is True: 707 return scores, predictions 708 709 return scores 710 711 def get_best_model(self): 712 """ 713 This function returns the best model pipeline based on the sort_by metric. 714 715 Returns: 716 717 best_model: object, 718 Returns the best model pipeline based on the sort_by metric. 719 720 """ 721 return self.best_model_ 722 723 def provide_models(self, X_train, X_test, y_train, y_test): 724 """Returns all the model objects trained. If fit hasn't been called yet, 725 then it's called to return the models. 726 727 Parameters: 728 729 X_train: array-like, 730 Training vectors, where rows is the number of samples 731 and columns is the number of features. 732 733 X_test: array-like, 734 Testing vectors, where rows is the number of samples 735 and columns is the number of features. 736 737 y_train: array-like, 738 Training vectors, where rows is the number of samples 739 and columns is the number of features. 740 741 y_test: array-like, 742 Testing vectors, where rows is the number of samples 743 and columns is the number of features. 744 745 Returns: 746 747 models: dict-object, 748 Returns a dictionary with each model's pipeline as value 749 and key = name of the model. 750 """ 751 if len(self.models_.keys()) == 0: 752 self.fit(X_train, X_test, y_train, y_test) 753 754 return self.models_
Fitting -- almost -- all the classification algorithms with layers of nnetsauce's CustomClassifier and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not
able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom
evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are
returned as data frame.
sort_by: string, optional (default='Accuracy')
Sort models by a metric. Available options are 'Accuracy',
'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric
identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' for > 90 classifiers
(default='all')
preprocess: bool, preprocessing is done when set to True
n_jobs: int, when possible, run in parallel
For now, only used by individual models that support it.
n_layers: int, optional (default=3)
Number of layers of CustomClassifiers to be used.
All the other parameters are the same as CustomClassifier's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline.
Examples
import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
data = load_breast_cancer()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2,
random_state=123)
clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
print(models)
229 def fit(self, X_train, X_test, y_train, y_test): 230 """Fit classifiers to X_train and y_train, predict and score on X_test, 231 y_test. 232 233 Parameters: 234 235 X_train: array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 X_test: array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 y_train: array-like, 244 Training vectors, where rows is the number of samples 245 and columns is the number of features. 246 247 y_test: array-like, 248 Testing vectors, where rows is the number of samples 249 and columns is the number of features. 250 251 Returns: 252 253 scores: Pandas DataFrame 254 Returns metrics of all the models in a Pandas DataFrame. 255 256 predictions: Pandas DataFrame 257 Returns predictions of all the models in a Pandas DataFrame. 258 """ 259 Accuracy = [] 260 B_Accuracy = [] 261 ROC_AUC = [] 262 F1 = [] 263 names = [] 264 TIME = [] 265 predictions = {} 266 267 if self.custom_metric is not None: 268 CUSTOM_METRIC = [] 269 270 if isinstance(X_train, np.ndarray): 271 X_train = pd.DataFrame(X_train) 272 X_test = pd.DataFrame(X_test) 273 274 numeric_features = X_train.select_dtypes(include=[np.number]).columns 275 categorical_features = X_train.select_dtypes(include=["object"]).columns 276 277 categorical_low, categorical_high = get_card_split( 278 X_train, categorical_features 279 ) 280 281 if self.preprocess is True: 282 preprocessor = ColumnTransformer( 283 transformers=[ 284 ("numeric", numeric_transformer, numeric_features), 285 ( 286 "categorical_low", 287 categorical_transformer_low, 288 categorical_low, 289 ), 290 ( 291 "categorical_high", 292 categorical_transformer_high, 293 categorical_high, 294 ), 295 ] 296 ) 297 298 # baseline models 299 try: 300 baseline_names = ["RandomForestClassifier", "XGBClassifier"] 301 baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()] 302 except Exception as exception: 303 baseline_names = ["RandomForestClassifier"] 304 baseline_models = [RandomForestClassifier()] 305 306 for name, model in zip(baseline_names, baseline_models): 307 start = time.time() 308 try: 309 model.fit(X_train, y_train) 310 self.models_[name] = model 311 y_pred = model.predict(X_test) 312 accuracy = accuracy_score(y_test, y_pred, normalize=True) 313 b_accuracy = balanced_accuracy_score(y_test, y_pred) 314 f1 = f1_score(y_test, y_pred, average="weighted") 315 try: 316 roc_auc = roc_auc_score(y_test, y_pred) 317 except Exception as exception: 318 roc_auc = None 319 if self.ignore_warnings is False: 320 print("ROC AUC couldn't be calculated for " + name) 321 print(exception) 322 names.append(name) 323 Accuracy.append(accuracy) 324 B_Accuracy.append(b_accuracy) 325 ROC_AUC.append(roc_auc) 326 F1.append(f1) 327 TIME.append(time.time() - start) 328 if self.custom_metric is not None: 329 custom_metric = self.custom_metric(y_test, y_pred) 330 CUSTOM_METRIC.append(custom_metric) 331 if self.verbose > 0: 332 if self.custom_metric is not None: 333 print( 334 { 335 "Model": name, 336 "Accuracy": accuracy, 337 "Balanced Accuracy": b_accuracy, 338 "ROC AUC": roc_auc, 339 "F1 Score": f1, 340 self.custom_metric.__name__: custom_metric, 341 "Time taken": time.time() - start, 342 } 343 ) 344 else: 345 print( 346 { 347 "Model": name, 348 "Accuracy": accuracy, 349 "Balanced Accuracy": b_accuracy, 350 "ROC AUC": roc_auc, 351 "F1 Score": f1, 352 "Time taken": time.time() - start, 353 } 354 ) 355 if self.predictions: 356 predictions[name] = y_pred 357 except Exception as exception: 358 if self.ignore_warnings is False: 359 print(name + " model failed to execute") 360 print(exception) 361 362 if self.estimators == "all": 363 self.classifiers = [ 364 item 365 for sublist in [ 366 DEEPCLASSIFIERS, 367 DEEPMULTITASKCLASSIFIERS, 368 DEEPSIMPLEMULTITASKCLASSIFIERS, 369 ] 370 for item in sublist 371 ] 372 else: 373 self.classifiers = ( 374 [ 375 ("DeepCustomClassifier(" + est[0] + ")", est[1]) 376 for est in all_estimators() 377 if ( 378 issubclass(est[1], ClassifierMixin) 379 and (est[0] in self.estimators) 380 ) 381 ] 382 + [ 383 ( 384 "DeepMultitaskClassifier(" + est[0] + ")", 385 partial(MultitaskClassifier, obj=est[1]()), 386 ) 387 for est in all_estimators() 388 if ( 389 issubclass(est[1], RegressorMixin) 390 and (est[0] in self.estimators) 391 ) 392 ] 393 + [ 394 ( 395 "DeepSimpleMultitaskClassifier(" + est[0] + ")", 396 partial(SimpleMultitaskClassifier, obj=est[1]()), 397 ) 398 for est in all_estimators() 399 if ( 400 issubclass(est[1], RegressorMixin) 401 and (est[0] in self.estimators) 402 ) 403 ] 404 ) 405 406 if self.preprocess is True: 407 for name, model in tqdm(self.classifiers): # do parallel exec 408 other_args = ( 409 {} 410 ) # use this trick for `random_state` too --> refactor 411 try: 412 if ( 413 "n_jobs" in model().get_params().keys() 414 and name.find("LogisticRegression") == -1 415 ): 416 other_args["n_jobs"] = self.n_jobs 417 except Exception: 418 pass 419 420 start = time.time() 421 422 try: 423 if "random_state" in model().get_params().keys(): 424 layer_clf = CustomClassifier( 425 obj=model(random_state=self.random_state), 426 n_hidden_features=self.n_hidden_features, 427 activation_name=self.activation_name, 428 a=self.a, 429 nodes_sim=self.nodes_sim, 430 bias=self.bias, 431 dropout=self.dropout, 432 direct_link=self.direct_link, 433 n_clusters=self.n_clusters, 434 cluster_encode=self.cluster_encode, 435 type_clust=self.type_clust, 436 type_scaling=self.type_scaling, 437 col_sample=self.col_sample, 438 row_sample=self.row_sample, 439 seed=self.seed, 440 backend=self.backend, 441 cv_calibration=None, 442 ) 443 444 else: 445 layer_clf = CustomClassifier( 446 obj=model(), 447 n_hidden_features=self.n_hidden_features, 448 activation_name=self.activation_name, 449 a=self.a, 450 nodes_sim=self.nodes_sim, 451 bias=self.bias, 452 dropout=self.dropout, 453 direct_link=self.direct_link, 454 n_clusters=self.n_clusters, 455 cluster_encode=self.cluster_encode, 456 type_clust=self.type_clust, 457 type_scaling=self.type_scaling, 458 col_sample=self.col_sample, 459 row_sample=self.row_sample, 460 seed=self.seed, 461 backend=self.backend, 462 cv_calibration=None, 463 ) 464 465 layer_clf.fit(X_train, y_train) 466 467 for _ in range(self.n_layers): 468 layer_clf = deepcopy( 469 CustomClassifier( 470 obj=layer_clf, 471 n_hidden_features=self.n_hidden_features, 472 activation_name=self.activation_name, 473 a=self.a, 474 nodes_sim=self.nodes_sim, 475 bias=self.bias, 476 dropout=self.dropout, 477 direct_link=self.direct_link, 478 n_clusters=self.n_clusters, 479 cluster_encode=self.cluster_encode, 480 type_clust=self.type_clust, 481 type_scaling=self.type_scaling, 482 col_sample=self.col_sample, 483 row_sample=self.row_sample, 484 seed=self.seed, 485 backend=self.backend, 486 cv_calibration=None, 487 ) 488 ) 489 490 pipe = Pipeline( 491 [ 492 ("preprocessor", preprocessor), 493 ("classifier", layer_clf), 494 ] 495 ) 496 497 pipe.fit(X_train, y_train) 498 self.models_[name] = pipe 499 y_pred = pipe.predict(X_test) 500 accuracy = accuracy_score(y_test, y_pred, normalize=True) 501 b_accuracy = balanced_accuracy_score(y_test, y_pred) 502 f1 = f1_score(y_test, y_pred, average="weighted") 503 try: 504 roc_auc = roc_auc_score(y_test, y_pred) 505 except Exception as exception: 506 roc_auc = None 507 if self.ignore_warnings is False: 508 print("ROC AUC couldn't be calculated for " + name) 509 print(exception) 510 names.append(name) 511 Accuracy.append(accuracy) 512 B_Accuracy.append(b_accuracy) 513 ROC_AUC.append(roc_auc) 514 F1.append(f1) 515 TIME.append(time.time() - start) 516 if self.custom_metric is not None: 517 custom_metric = self.custom_metric(y_test, y_pred) 518 CUSTOM_METRIC.append(custom_metric) 519 if self.verbose > 0: 520 if self.custom_metric is not None: 521 print( 522 { 523 "Model": name, 524 "Accuracy": accuracy, 525 "Balanced Accuracy": b_accuracy, 526 "ROC AUC": roc_auc, 527 "F1 Score": f1, 528 self.custom_metric.__name__: custom_metric, 529 "Time taken": time.time() - start, 530 } 531 ) 532 else: 533 print( 534 { 535 "Model": name, 536 "Accuracy": accuracy, 537 "Balanced Accuracy": b_accuracy, 538 "ROC AUC": roc_auc, 539 "F1 Score": f1, 540 "Time taken": time.time() - start, 541 } 542 ) 543 if self.predictions: 544 predictions[name] = y_pred 545 except Exception as exception: 546 if self.ignore_warnings is False: 547 print(name + " model failed to execute") 548 print(exception) 549 550 else: # no preprocessing 551 for name, model in tqdm(self.classifiers): # do parallel exec 552 start = time.time() 553 try: 554 if "random_state" in model().get_params().keys(): 555 layer_clf = CustomClassifier( 556 obj=model(random_state=self.random_state), 557 n_hidden_features=self.n_hidden_features, 558 activation_name=self.activation_name, 559 a=self.a, 560 nodes_sim=self.nodes_sim, 561 bias=self.bias, 562 dropout=self.dropout, 563 direct_link=self.direct_link, 564 n_clusters=self.n_clusters, 565 cluster_encode=self.cluster_encode, 566 type_clust=self.type_clust, 567 type_scaling=self.type_scaling, 568 col_sample=self.col_sample, 569 row_sample=self.row_sample, 570 seed=self.seed, 571 backend=self.backend, 572 cv_calibration=None, 573 ) 574 575 else: 576 layer_clf = CustomClassifier( 577 obj=model(), 578 n_hidden_features=self.n_hidden_features, 579 activation_name=self.activation_name, 580 a=self.a, 581 nodes_sim=self.nodes_sim, 582 bias=self.bias, 583 dropout=self.dropout, 584 direct_link=self.direct_link, 585 n_clusters=self.n_clusters, 586 cluster_encode=self.cluster_encode, 587 type_clust=self.type_clust, 588 type_scaling=self.type_scaling, 589 col_sample=self.col_sample, 590 row_sample=self.row_sample, 591 seed=self.seed, 592 backend=self.backend, 593 cv_calibration=None, 594 ) 595 596 layer_clf.fit(X_train, y_train) 597 598 for _ in range(self.n_layers): 599 layer_clf = deepcopy( 600 CustomClassifier( 601 obj=layer_clf, 602 n_hidden_features=self.n_hidden_features, 603 activation_name=self.activation_name, 604 a=self.a, 605 nodes_sim=self.nodes_sim, 606 bias=self.bias, 607 dropout=self.dropout, 608 direct_link=self.direct_link, 609 n_clusters=self.n_clusters, 610 cluster_encode=self.cluster_encode, 611 type_clust=self.type_clust, 612 type_scaling=self.type_scaling, 613 col_sample=self.col_sample, 614 row_sample=self.row_sample, 615 seed=self.seed, 616 backend=self.backend, 617 cv_calibration=None, 618 ) 619 ) 620 621 # layer_clf.fit(X_train, y_train) 622 623 layer_clf.fit(X_train, y_train) 624 625 self.models_[name] = layer_clf 626 y_pred = layer_clf.predict(X_test) 627 accuracy = accuracy_score(y_test, y_pred, normalize=True) 628 b_accuracy = balanced_accuracy_score(y_test, y_pred) 629 f1 = f1_score(y_test, y_pred, average="weighted") 630 try: 631 roc_auc = roc_auc_score(y_test, y_pred) 632 except Exception as exception: 633 roc_auc = None 634 if self.ignore_warnings is False: 635 print("ROC AUC couldn't be calculated for " + name) 636 print(exception) 637 names.append(name) 638 Accuracy.append(accuracy) 639 B_Accuracy.append(b_accuracy) 640 ROC_AUC.append(roc_auc) 641 F1.append(f1) 642 TIME.append(time.time() - start) 643 if self.custom_metric is not None: 644 custom_metric = self.custom_metric(y_test, y_pred) 645 CUSTOM_METRIC.append(custom_metric) 646 if self.verbose > 0: 647 if self.custom_metric is not None: 648 print( 649 { 650 "Model": name, 651 "Accuracy": accuracy, 652 "Balanced Accuracy": b_accuracy, 653 "ROC AUC": roc_auc, 654 "F1 Score": f1, 655 self.custom_metric.__name__: custom_metric, 656 "Time taken": time.time() - start, 657 } 658 ) 659 else: 660 print( 661 { 662 "Model": name, 663 "Accuracy": accuracy, 664 "Balanced Accuracy": b_accuracy, 665 "ROC AUC": roc_auc, 666 "F1 Score": f1, 667 "Time taken": time.time() - start, 668 } 669 ) 670 if self.predictions: 671 predictions[name] = y_pred 672 except Exception as exception: 673 if self.ignore_warnings is False: 674 print(name + " model failed to execute") 675 print(exception) 676 677 if self.custom_metric is None: 678 scores = pd.DataFrame( 679 { 680 "Model": names, 681 "Accuracy": Accuracy, 682 "Balanced Accuracy": B_Accuracy, 683 "ROC AUC": ROC_AUC, 684 "F1 Score": F1, 685 "Time Taken": TIME, 686 } 687 ) 688 else: 689 scores = pd.DataFrame( 690 { 691 "Model": names, 692 "Accuracy": Accuracy, 693 "Balanced Accuracy": B_Accuracy, 694 "ROC AUC": ROC_AUC, 695 "F1 Score": F1, 696 "Custom metric": CUSTOM_METRIC, 697 "Time Taken": TIME, 698 } 699 ) 700 scores = scores.sort_values(by=self.sort_by, ascending=False).set_index( 701 "Model" 702 ) 703 704 self.best_model_ = self.models_[scores.index[0]] 705 706 if self.predictions is True: 707 return scores, predictions 708 709 return scores
Fit classifiers to X_train and y_train, predict and score on X_test, y_test.
Parameters:
X_train: array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test: array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
y_train: array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y_test: array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
scores: Pandas DataFrame
Returns metrics of all the models in a Pandas DataFrame.
predictions: Pandas DataFrame
Returns predictions of all the models in a Pandas DataFrame.
723 def provide_models(self, X_train, X_test, y_train, y_test): 724 """Returns all the model objects trained. If fit hasn't been called yet, 725 then it's called to return the models. 726 727 Parameters: 728 729 X_train: array-like, 730 Training vectors, where rows is the number of samples 731 and columns is the number of features. 732 733 X_test: array-like, 734 Testing vectors, where rows is the number of samples 735 and columns is the number of features. 736 737 y_train: array-like, 738 Training vectors, where rows is the number of samples 739 and columns is the number of features. 740 741 y_test: array-like, 742 Testing vectors, where rows is the number of samples 743 and columns is the number of features. 744 745 Returns: 746 747 models: dict-object, 748 Returns a dictionary with each model's pipeline as value 749 and key = name of the model. 750 """ 751 if len(self.models_.keys()) == 0: 752 self.fit(X_train, X_test, y_train, y_test) 753 754 return self.models_
Returns all the model objects trained. If fit hasn't been called yet, then it's called to return the models.
Parameters:
X_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.
X_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.
y_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.
y_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.
Returns:
models: dict-object,
Returns a dictionary with each model's pipeline as value
and key = name of the model.
90class LazyDeepRegressor(Custom, RegressorMixin): 91 """ 92 Fitting -- almost -- all the regression algorithms with layers of 93 nnetsauce's CustomRegressor and returning their scores. 94 95 Parameters: 96 97 verbose: int, optional (default=0) 98 Any positive number for verbosity. 99 100 ignore_warnings: bool, optional (default=True) 101 When set to True, the warning related to algorigms that are not able to run are ignored. 102 103 custom_metric: function, optional (default=None) 104 When function is provided, models are evaluated based on the custom evaluation metric provided. 105 106 predictions: bool, optional (default=False) 107 When set to True, the predictions of all the models models are returned as dataframe. 108 109 sort_by: string, optional (default='RMSE') 110 Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'. 111 or a custom metric identified by its name and provided by custom_metric. 112 113 random_state: int, optional (default=42) 114 Reproducibiility seed. 115 116 estimators: list, optional (default='all') 117 list of Estimators names or just 'all' (default='all') 118 119 preprocess: bool 120 preprocessing is done when set to True 121 122 n_jobs : int, when possible, run in parallel 123 For now, only used by individual models that support it. 124 125 n_layers: int, optional (default=3) 126 Number of layers of CustomRegressors to be used. 127 128 All the other parameters are the same as CustomRegressor's. 129 130 Attributes: 131 132 models_: dict-object 133 Returns a dictionary with each model pipeline as value 134 with key as name of models. 135 136 best_model_: object 137 Returns the best model pipeline based on the sort_by metric. 138 139 Examples: 140 141 import nnetsauce as ns 142 import numpy as np 143 from sklearn import datasets 144 from sklearn.utils import shuffle 145 146 diabetes = datasets.load_diabetes() 147 X, y = shuffle(diabetes.data, diabetes.target, random_state=13) 148 X = X.astype(np.float32) 149 150 offset = int(X.shape[0] * 0.9) 151 X_train, y_train = X[:offset], y[:offset] 152 X_test, y_test = X[offset:], y[offset:] 153 154 reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None) 155 models, predictions = reg.fit(X_train, X_test, y_train, y_test) 156 print(models) 157 158 """ 159 160 def __init__( 161 self, 162 verbose=0, 163 ignore_warnings=True, 164 custom_metric=None, 165 predictions=False, 166 sort_by="RMSE", 167 random_state=42, 168 estimators="all", 169 preprocess=False, 170 n_jobs=None, 171 # Defining depth 172 n_layers=3, 173 # CustomRegressor attributes 174 obj=None, 175 n_hidden_features=5, 176 activation_name="relu", 177 a=0.01, 178 nodes_sim="sobol", 179 bias=True, 180 dropout=0, 181 direct_link=True, 182 n_clusters=2, 183 cluster_encode=True, 184 type_clust="kmeans", 185 type_scaling=("std", "std", "std"), 186 col_sample=1, 187 row_sample=1, 188 seed=123, 189 backend="cpu", 190 ): 191 self.verbose = verbose 192 self.ignore_warnings = ignore_warnings 193 self.custom_metric = custom_metric 194 self.predictions = predictions 195 self.sort_by = sort_by 196 self.models_ = {} 197 self.best_model_ = None 198 self.random_state = random_state 199 self.estimators = estimators 200 self.preprocess = preprocess 201 self.n_layers = n_layers - 1 202 self.n_jobs = n_jobs 203 super().__init__( 204 obj=obj, 205 n_hidden_features=n_hidden_features, 206 activation_name=activation_name, 207 a=a, 208 nodes_sim=nodes_sim, 209 bias=bias, 210 dropout=dropout, 211 direct_link=direct_link, 212 n_clusters=n_clusters, 213 cluster_encode=cluster_encode, 214 type_clust=type_clust, 215 type_scaling=type_scaling, 216 col_sample=col_sample, 217 row_sample=row_sample, 218 seed=seed, 219 backend=backend, 220 ) 221 222 def fit(self, X_train, X_test, y_train, y_test): 223 """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test. 224 225 Parameters: 226 227 X_train : array-like, 228 Training vectors, where rows is the number of samples 229 and columns is the number of features. 230 231 X_test : array-like, 232 Testing vectors, where rows is the number of samples 233 and columns is the number of features. 234 235 y_train : array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 y_test : array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 Returns: 244 ------- 245 scores: Pandas DataFrame 246 Returns metrics of all the models in a Pandas DataFrame. 247 248 predictions : Pandas DataFrame 249 Returns predictions of all the models in a Pandas DataFrame. 250 251 """ 252 R2 = [] 253 ADJR2 = [] 254 RMSE = [] 255 # WIN = [] 256 names = [] 257 TIME = [] 258 predictions = {} 259 260 if self.custom_metric: 261 CUSTOM_METRIC = [] 262 263 if isinstance(X_train, np.ndarray): 264 X_train = pd.DataFrame(X_train) 265 X_test = pd.DataFrame(X_test) 266 267 numeric_features = X_train.select_dtypes(include=[np.number]).columns 268 categorical_features = X_train.select_dtypes(include=["object"]).columns 269 270 categorical_low, categorical_high = get_card_split( 271 X_train, categorical_features 272 ) 273 274 if self.preprocess is True: 275 preprocessor = ColumnTransformer( 276 transformers=[ 277 ("numeric", numeric_transformer, numeric_features), 278 ( 279 "categorical_low", 280 categorical_transformer_low, 281 categorical_low, 282 ), 283 ( 284 "categorical_high", 285 categorical_transformer_high, 286 categorical_high, 287 ), 288 ] 289 ) 290 291 # base models 292 try: 293 baseline_names = ["RandomForestRegressor", "XGBRegressor"] 294 baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()] 295 except Exception as exception: 296 baseline_names = ["RandomForestRegressor"] 297 baseline_models = [RandomForestRegressor()] 298 299 for name, model in zip(baseline_names, baseline_models): 300 start = time.time() 301 try: 302 model.fit(X_train, y_train) 303 self.models_[name] = model 304 y_pred = model.predict(X_test) 305 r_squared = r2_score(y_test, y_pred) 306 adj_rsquared = adjusted_rsquared( 307 r_squared, X_test.shape[0], X_test.shape[1] 308 ) 309 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 310 311 names.append(name) 312 R2.append(r_squared) 313 ADJR2.append(adj_rsquared) 314 RMSE.append(rmse) 315 TIME.append(time.time() - start) 316 317 if self.custom_metric: 318 custom_metric = self.custom_metric(y_test, y_pred) 319 CUSTOM_METRIC.append(custom_metric) 320 321 if self.verbose > 0: 322 scores_verbose = { 323 "Model": name, 324 "R-Squared": r_squared, 325 "Adjusted R-Squared": adj_rsquared, 326 "RMSE": rmse, 327 "Time taken": time.time() - start, 328 } 329 330 if self.custom_metric: 331 scores_verbose[self.custom_metric.__name__] = ( 332 custom_metric 333 ) 334 335 print(scores_verbose) 336 if self.predictions: 337 predictions[name] = y_pred 338 except Exception as exception: 339 if self.ignore_warnings is False: 340 print(name + " model failed to execute") 341 print(exception) 342 343 if self.estimators == "all": 344 self.regressors = DEEPREGRESSORS 345 else: 346 self.regressors = [ 347 ("DeepCustomRegressor(" + est[0] + ")", est[1]) 348 for est in all_estimators() 349 if ( 350 issubclass(est[1], RegressorMixin) 351 and (est[0] in self.estimators) 352 ) 353 ] 354 355 if self.preprocess is True: 356 for name, model in tqdm(self.regressors): # do parallel exec 357 start = time.time() 358 try: 359 if "random_state" in model().get_params().keys(): 360 layer_regr = CustomRegressor( 361 obj=model(random_state=self.random_state), 362 n_hidden_features=self.n_hidden_features, 363 activation_name=self.activation_name, 364 a=self.a, 365 nodes_sim=self.nodes_sim, 366 bias=self.bias, 367 dropout=self.dropout, 368 direct_link=self.direct_link, 369 n_clusters=self.n_clusters, 370 cluster_encode=self.cluster_encode, 371 type_clust=self.type_clust, 372 type_scaling=self.type_scaling, 373 col_sample=self.col_sample, 374 row_sample=self.row_sample, 375 seed=self.seed, 376 backend=self.backend, 377 ) 378 else: 379 layer_regr = CustomRegressor( 380 obj=model(), 381 n_hidden_features=self.n_hidden_features, 382 activation_name=self.activation_name, 383 a=self.a, 384 nodes_sim=self.nodes_sim, 385 bias=self.bias, 386 dropout=self.dropout, 387 direct_link=self.direct_link, 388 n_clusters=self.n_clusters, 389 cluster_encode=self.cluster_encode, 390 type_clust=self.type_clust, 391 type_scaling=self.type_scaling, 392 col_sample=self.col_sample, 393 row_sample=self.row_sample, 394 seed=self.seed, 395 backend=self.backend, 396 ) 397 398 for _ in range(self.n_layers): 399 layer_regr = deepcopy( 400 CustomRegressor( 401 obj=layer_regr, 402 n_hidden_features=self.n_hidden_features, 403 activation_name=self.activation_name, 404 a=self.a, 405 nodes_sim=self.nodes_sim, 406 bias=self.bias, 407 dropout=self.dropout, 408 direct_link=self.direct_link, 409 n_clusters=self.n_clusters, 410 cluster_encode=self.cluster_encode, 411 type_clust=self.type_clust, 412 type_scaling=self.type_scaling, 413 col_sample=self.col_sample, 414 row_sample=self.row_sample, 415 seed=self.seed, 416 backend=self.backend, 417 ) 418 ) 419 420 layer_regr.fit(X_train, y_train) 421 422 pipe = Pipeline( 423 steps=[ 424 ("preprocessor", preprocessor), 425 ("regressor", layer_regr), 426 ] 427 ) 428 429 pipe.fit(X_train, y_train) 430 431 self.models_[name] = pipe 432 y_pred = pipe.predict(X_test) 433 r_squared = r2_score(y_test, y_pred) 434 adj_rsquared = adjusted_rsquared( 435 r_squared, X_test.shape[0], X_test.shape[1] 436 ) 437 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 438 439 names.append(name) 440 R2.append(r_squared) 441 ADJR2.append(adj_rsquared) 442 RMSE.append(rmse) 443 TIME.append(time.time() - start) 444 445 if self.custom_metric: 446 custom_metric = self.custom_metric(y_test, y_pred) 447 CUSTOM_METRIC.append(custom_metric) 448 449 if self.verbose > 0: 450 scores_verbose = { 451 "Model": name, 452 "R-Squared": r_squared, 453 "Adjusted R-Squared": adj_rsquared, 454 "RMSE": rmse, 455 "Time taken": time.time() - start, 456 } 457 458 if self.custom_metric: 459 scores_verbose[self.custom_metric.__name__] = ( 460 custom_metric 461 ) 462 463 print(scores_verbose) 464 if self.predictions: 465 predictions[name] = y_pred 466 except Exception as exception: 467 if self.ignore_warnings is False: 468 print(name + " model failed to execute") 469 print(exception) 470 471 else: # no preprocessing 472 for name, model in tqdm(self.regressors): # do parallel exec 473 start = time.time() 474 try: 475 if "random_state" in model().get_params().keys(): 476 layer_regr = CustomRegressor( 477 obj=model(random_state=self.random_state), 478 n_hidden_features=self.n_hidden_features, 479 activation_name=self.activation_name, 480 a=self.a, 481 nodes_sim=self.nodes_sim, 482 bias=self.bias, 483 dropout=self.dropout, 484 direct_link=self.direct_link, 485 n_clusters=self.n_clusters, 486 cluster_encode=self.cluster_encode, 487 type_clust=self.type_clust, 488 type_scaling=self.type_scaling, 489 col_sample=self.col_sample, 490 row_sample=self.row_sample, 491 seed=self.seed, 492 backend=self.backend, 493 ) 494 else: 495 layer_regr = CustomRegressor( 496 obj=model(), 497 n_hidden_features=self.n_hidden_features, 498 activation_name=self.activation_name, 499 a=self.a, 500 nodes_sim=self.nodes_sim, 501 bias=self.bias, 502 dropout=self.dropout, 503 direct_link=self.direct_link, 504 n_clusters=self.n_clusters, 505 cluster_encode=self.cluster_encode, 506 type_clust=self.type_clust, 507 type_scaling=self.type_scaling, 508 col_sample=self.col_sample, 509 row_sample=self.row_sample, 510 seed=self.seed, 511 backend=self.backend, 512 ) 513 514 layer_regr.fit(X_train, y_train) 515 516 for _ in range(self.n_layers): 517 layer_regr = deepcopy( 518 CustomRegressor( 519 obj=layer_regr, 520 n_hidden_features=self.n_hidden_features, 521 activation_name=self.activation_name, 522 a=self.a, 523 nodes_sim=self.nodes_sim, 524 bias=self.bias, 525 dropout=self.dropout, 526 direct_link=self.direct_link, 527 n_clusters=self.n_clusters, 528 cluster_encode=self.cluster_encode, 529 type_clust=self.type_clust, 530 type_scaling=self.type_scaling, 531 col_sample=self.col_sample, 532 row_sample=self.row_sample, 533 seed=self.seed, 534 backend=self.backend, 535 ) 536 ) 537 538 # layer_regr.fit(X_train, y_train) 539 540 layer_regr.fit(X_train, y_train) 541 542 self.models_[name] = layer_regr 543 y_pred = layer_regr.predict(X_test) 544 545 r_squared = r2_score(y_test, y_pred) 546 adj_rsquared = adjusted_rsquared( 547 r_squared, X_test.shape[0], X_test.shape[1] 548 ) 549 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 550 551 names.append(name) 552 R2.append(r_squared) 553 ADJR2.append(adj_rsquared) 554 RMSE.append(rmse) 555 TIME.append(time.time() - start) 556 557 if self.custom_metric: 558 custom_metric = self.custom_metric(y_test, y_pred) 559 CUSTOM_METRIC.append(custom_metric) 560 561 if self.verbose > 0: 562 scores_verbose = { 563 "Model": name, 564 "R-Squared": r_squared, 565 "Adjusted R-Squared": adj_rsquared, 566 "RMSE": rmse, 567 "Time taken": time.time() - start, 568 } 569 570 if self.custom_metric: 571 scores_verbose[self.custom_metric.__name__] = ( 572 custom_metric 573 ) 574 575 print(scores_verbose) 576 if self.predictions: 577 predictions[name] = y_pred 578 except Exception as exception: 579 if self.ignore_warnings is False: 580 print(name + " model failed to execute") 581 print(exception) 582 583 scores = { 584 "Model": names, 585 "Adjusted R-Squared": ADJR2, 586 "R-Squared": R2, 587 "RMSE": RMSE, 588 "Time Taken": TIME, 589 } 590 591 if self.custom_metric: 592 scores["Custom metric"] = CUSTOM_METRIC 593 594 scores = pd.DataFrame(scores) 595 scores = scores.sort_values(by=self.sort_by, ascending=True).set_index( 596 "Model" 597 ) 598 599 self.best_model_ = self.models_[scores.index[0]] 600 601 if self.predictions is True: 602 return scores, predictions 603 604 return scores 605 606 def get_best_model(self): 607 """ 608 This function returns the best model pipeline based on the sort_by metric. 609 610 Returns: 611 612 best_model: object, 613 Returns the best model pipeline based on the sort_by metric. 614 615 """ 616 return self.best_model_ 617 618 def provide_models(self, X_train, X_test, y_train, y_test): 619 """ 620 This function returns all the model objects trained in fit function. 621 If fit is not called already, then we call fit and then return the models. 622 623 Parameters: 624 625 X_train : array-like, 626 Training vectors, where rows is the number of samples 627 and columns is the number of features. 628 629 X_test : array-like, 630 Testing vectors, where rows is the number of samples 631 and columns is the number of features. 632 633 y_train : array-like, 634 Training vectors, where rows is the number of samples 635 and columns is the number of features. 636 637 y_test : array-like, 638 Testing vectors, where rows is the number of samples 639 and columns is the number of features. 640 641 Returns: 642 643 models: dict-object, 644 Returns a dictionary with each model pipeline as value 645 with key as name of models. 646 647 """ 648 if len(self.models_.keys()) == 0: 649 self.fit(X_train, X_test, y_train, y_test) 650 651 return self.models_
Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
or a custom metric identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' (default='all')
preprocess: bool
preprocessing is done when set to True
n_jobs : int, when possible, run in parallel
For now, only used by individual models that support it.
n_layers: int, optional (default=3)
Number of layers of CustomRegressors to be used.
All the other parameters are the same as CustomRegressor's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)
offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]
reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
222 def fit(self, X_train, X_test, y_train, y_test): 223 """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test. 224 225 Parameters: 226 227 X_train : array-like, 228 Training vectors, where rows is the number of samples 229 and columns is the number of features. 230 231 X_test : array-like, 232 Testing vectors, where rows is the number of samples 233 and columns is the number of features. 234 235 y_train : array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 y_test : array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 Returns: 244 ------- 245 scores: Pandas DataFrame 246 Returns metrics of all the models in a Pandas DataFrame. 247 248 predictions : Pandas DataFrame 249 Returns predictions of all the models in a Pandas DataFrame. 250 251 """ 252 R2 = [] 253 ADJR2 = [] 254 RMSE = [] 255 # WIN = [] 256 names = [] 257 TIME = [] 258 predictions = {} 259 260 if self.custom_metric: 261 CUSTOM_METRIC = [] 262 263 if isinstance(X_train, np.ndarray): 264 X_train = pd.DataFrame(X_train) 265 X_test = pd.DataFrame(X_test) 266 267 numeric_features = X_train.select_dtypes(include=[np.number]).columns 268 categorical_features = X_train.select_dtypes(include=["object"]).columns 269 270 categorical_low, categorical_high = get_card_split( 271 X_train, categorical_features 272 ) 273 274 if self.preprocess is True: 275 preprocessor = ColumnTransformer( 276 transformers=[ 277 ("numeric", numeric_transformer, numeric_features), 278 ( 279 "categorical_low", 280 categorical_transformer_low, 281 categorical_low, 282 ), 283 ( 284 "categorical_high", 285 categorical_transformer_high, 286 categorical_high, 287 ), 288 ] 289 ) 290 291 # base models 292 try: 293 baseline_names = ["RandomForestRegressor", "XGBRegressor"] 294 baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()] 295 except Exception as exception: 296 baseline_names = ["RandomForestRegressor"] 297 baseline_models = [RandomForestRegressor()] 298 299 for name, model in zip(baseline_names, baseline_models): 300 start = time.time() 301 try: 302 model.fit(X_train, y_train) 303 self.models_[name] = model 304 y_pred = model.predict(X_test) 305 r_squared = r2_score(y_test, y_pred) 306 adj_rsquared = adjusted_rsquared( 307 r_squared, X_test.shape[0], X_test.shape[1] 308 ) 309 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 310 311 names.append(name) 312 R2.append(r_squared) 313 ADJR2.append(adj_rsquared) 314 RMSE.append(rmse) 315 TIME.append(time.time() - start) 316 317 if self.custom_metric: 318 custom_metric = self.custom_metric(y_test, y_pred) 319 CUSTOM_METRIC.append(custom_metric) 320 321 if self.verbose > 0: 322 scores_verbose = { 323 "Model": name, 324 "R-Squared": r_squared, 325 "Adjusted R-Squared": adj_rsquared, 326 "RMSE": rmse, 327 "Time taken": time.time() - start, 328 } 329 330 if self.custom_metric: 331 scores_verbose[self.custom_metric.__name__] = ( 332 custom_metric 333 ) 334 335 print(scores_verbose) 336 if self.predictions: 337 predictions[name] = y_pred 338 except Exception as exception: 339 if self.ignore_warnings is False: 340 print(name + " model failed to execute") 341 print(exception) 342 343 if self.estimators == "all": 344 self.regressors = DEEPREGRESSORS 345 else: 346 self.regressors = [ 347 ("DeepCustomRegressor(" + est[0] + ")", est[1]) 348 for est in all_estimators() 349 if ( 350 issubclass(est[1], RegressorMixin) 351 and (est[0] in self.estimators) 352 ) 353 ] 354 355 if self.preprocess is True: 356 for name, model in tqdm(self.regressors): # do parallel exec 357 start = time.time() 358 try: 359 if "random_state" in model().get_params().keys(): 360 layer_regr = CustomRegressor( 361 obj=model(random_state=self.random_state), 362 n_hidden_features=self.n_hidden_features, 363 activation_name=self.activation_name, 364 a=self.a, 365 nodes_sim=self.nodes_sim, 366 bias=self.bias, 367 dropout=self.dropout, 368 direct_link=self.direct_link, 369 n_clusters=self.n_clusters, 370 cluster_encode=self.cluster_encode, 371 type_clust=self.type_clust, 372 type_scaling=self.type_scaling, 373 col_sample=self.col_sample, 374 row_sample=self.row_sample, 375 seed=self.seed, 376 backend=self.backend, 377 ) 378 else: 379 layer_regr = CustomRegressor( 380 obj=model(), 381 n_hidden_features=self.n_hidden_features, 382 activation_name=self.activation_name, 383 a=self.a, 384 nodes_sim=self.nodes_sim, 385 bias=self.bias, 386 dropout=self.dropout, 387 direct_link=self.direct_link, 388 n_clusters=self.n_clusters, 389 cluster_encode=self.cluster_encode, 390 type_clust=self.type_clust, 391 type_scaling=self.type_scaling, 392 col_sample=self.col_sample, 393 row_sample=self.row_sample, 394 seed=self.seed, 395 backend=self.backend, 396 ) 397 398 for _ in range(self.n_layers): 399 layer_regr = deepcopy( 400 CustomRegressor( 401 obj=layer_regr, 402 n_hidden_features=self.n_hidden_features, 403 activation_name=self.activation_name, 404 a=self.a, 405 nodes_sim=self.nodes_sim, 406 bias=self.bias, 407 dropout=self.dropout, 408 direct_link=self.direct_link, 409 n_clusters=self.n_clusters, 410 cluster_encode=self.cluster_encode, 411 type_clust=self.type_clust, 412 type_scaling=self.type_scaling, 413 col_sample=self.col_sample, 414 row_sample=self.row_sample, 415 seed=self.seed, 416 backend=self.backend, 417 ) 418 ) 419 420 layer_regr.fit(X_train, y_train) 421 422 pipe = Pipeline( 423 steps=[ 424 ("preprocessor", preprocessor), 425 ("regressor", layer_regr), 426 ] 427 ) 428 429 pipe.fit(X_train, y_train) 430 431 self.models_[name] = pipe 432 y_pred = pipe.predict(X_test) 433 r_squared = r2_score(y_test, y_pred) 434 adj_rsquared = adjusted_rsquared( 435 r_squared, X_test.shape[0], X_test.shape[1] 436 ) 437 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 438 439 names.append(name) 440 R2.append(r_squared) 441 ADJR2.append(adj_rsquared) 442 RMSE.append(rmse) 443 TIME.append(time.time() - start) 444 445 if self.custom_metric: 446 custom_metric = self.custom_metric(y_test, y_pred) 447 CUSTOM_METRIC.append(custom_metric) 448 449 if self.verbose > 0: 450 scores_verbose = { 451 "Model": name, 452 "R-Squared": r_squared, 453 "Adjusted R-Squared": adj_rsquared, 454 "RMSE": rmse, 455 "Time taken": time.time() - start, 456 } 457 458 if self.custom_metric: 459 scores_verbose[self.custom_metric.__name__] = ( 460 custom_metric 461 ) 462 463 print(scores_verbose) 464 if self.predictions: 465 predictions[name] = y_pred 466 except Exception as exception: 467 if self.ignore_warnings is False: 468 print(name + " model failed to execute") 469 print(exception) 470 471 else: # no preprocessing 472 for name, model in tqdm(self.regressors): # do parallel exec 473 start = time.time() 474 try: 475 if "random_state" in model().get_params().keys(): 476 layer_regr = CustomRegressor( 477 obj=model(random_state=self.random_state), 478 n_hidden_features=self.n_hidden_features, 479 activation_name=self.activation_name, 480 a=self.a, 481 nodes_sim=self.nodes_sim, 482 bias=self.bias, 483 dropout=self.dropout, 484 direct_link=self.direct_link, 485 n_clusters=self.n_clusters, 486 cluster_encode=self.cluster_encode, 487 type_clust=self.type_clust, 488 type_scaling=self.type_scaling, 489 col_sample=self.col_sample, 490 row_sample=self.row_sample, 491 seed=self.seed, 492 backend=self.backend, 493 ) 494 else: 495 layer_regr = CustomRegressor( 496 obj=model(), 497 n_hidden_features=self.n_hidden_features, 498 activation_name=self.activation_name, 499 a=self.a, 500 nodes_sim=self.nodes_sim, 501 bias=self.bias, 502 dropout=self.dropout, 503 direct_link=self.direct_link, 504 n_clusters=self.n_clusters, 505 cluster_encode=self.cluster_encode, 506 type_clust=self.type_clust, 507 type_scaling=self.type_scaling, 508 col_sample=self.col_sample, 509 row_sample=self.row_sample, 510 seed=self.seed, 511 backend=self.backend, 512 ) 513 514 layer_regr.fit(X_train, y_train) 515 516 for _ in range(self.n_layers): 517 layer_regr = deepcopy( 518 CustomRegressor( 519 obj=layer_regr, 520 n_hidden_features=self.n_hidden_features, 521 activation_name=self.activation_name, 522 a=self.a, 523 nodes_sim=self.nodes_sim, 524 bias=self.bias, 525 dropout=self.dropout, 526 direct_link=self.direct_link, 527 n_clusters=self.n_clusters, 528 cluster_encode=self.cluster_encode, 529 type_clust=self.type_clust, 530 type_scaling=self.type_scaling, 531 col_sample=self.col_sample, 532 row_sample=self.row_sample, 533 seed=self.seed, 534 backend=self.backend, 535 ) 536 ) 537 538 # layer_regr.fit(X_train, y_train) 539 540 layer_regr.fit(X_train, y_train) 541 542 self.models_[name] = layer_regr 543 y_pred = layer_regr.predict(X_test) 544 545 r_squared = r2_score(y_test, y_pred) 546 adj_rsquared = adjusted_rsquared( 547 r_squared, X_test.shape[0], X_test.shape[1] 548 ) 549 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 550 551 names.append(name) 552 R2.append(r_squared) 553 ADJR2.append(adj_rsquared) 554 RMSE.append(rmse) 555 TIME.append(time.time() - start) 556 557 if self.custom_metric: 558 custom_metric = self.custom_metric(y_test, y_pred) 559 CUSTOM_METRIC.append(custom_metric) 560 561 if self.verbose > 0: 562 scores_verbose = { 563 "Model": name, 564 "R-Squared": r_squared, 565 "Adjusted R-Squared": adj_rsquared, 566 "RMSE": rmse, 567 "Time taken": time.time() - start, 568 } 569 570 if self.custom_metric: 571 scores_verbose[self.custom_metric.__name__] = ( 572 custom_metric 573 ) 574 575 print(scores_verbose) 576 if self.predictions: 577 predictions[name] = y_pred 578 except Exception as exception: 579 if self.ignore_warnings is False: 580 print(name + " model failed to execute") 581 print(exception) 582 583 scores = { 584 "Model": names, 585 "Adjusted R-Squared": ADJR2, 586 "R-Squared": R2, 587 "RMSE": RMSE, 588 "Time Taken": TIME, 589 } 590 591 if self.custom_metric: 592 scores["Custom metric"] = CUSTOM_METRIC 593 594 scores = pd.DataFrame(scores) 595 scores = scores.sort_values(by=self.sort_by, ascending=True).set_index( 596 "Model" 597 ) 598 599 self.best_model_ = self.models_[scores.index[0]] 600 601 if self.predictions is True: 602 return scores, predictions 603 604 return scores
Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.
Parameters:
X_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
y_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
scores: Pandas DataFrame Returns metrics of all the models in a Pandas DataFrame.
predictions : Pandas DataFrame Returns predictions of all the models in a Pandas DataFrame.
618 def provide_models(self, X_train, X_test, y_train, y_test): 619 """ 620 This function returns all the model objects trained in fit function. 621 If fit is not called already, then we call fit and then return the models. 622 623 Parameters: 624 625 X_train : array-like, 626 Training vectors, where rows is the number of samples 627 and columns is the number of features. 628 629 X_test : array-like, 630 Testing vectors, where rows is the number of samples 631 and columns is the number of features. 632 633 y_train : array-like, 634 Training vectors, where rows is the number of samples 635 and columns is the number of features. 636 637 y_test : array-like, 638 Testing vectors, where rows is the number of samples 639 and columns is the number of features. 640 641 Returns: 642 643 models: dict-object, 644 Returns a dictionary with each model pipeline as value 645 with key as name of models. 646 647 """ 648 if len(self.models_.keys()) == 0: 649 self.fit(X_train, X_test, y_train, y_test) 650 651 return self.models_
This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.
Parameters:
X_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
y_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
models: dict-object,
Returns a dictionary with each model pipeline as value
with key as name of models.
998class LazyMTS(LazyDeepMTS): 999 """ 1000 Fitting -- almost -- all the regression algorithms to multivariate time series 1001 and returning their scores (no layers). 1002 1003 Parameters: 1004 1005 verbose: int, optional (default=0) 1006 Any positive number for verbosity. 1007 1008 ignore_warnings: bool, optional (default=True) 1009 When set to True, the warning related to algorigms that are not 1010 able to run are ignored. 1011 1012 custom_metric: function, optional (default=None) 1013 When function is provided, models are evaluated based on the custom 1014 evaluation metric provided. 1015 1016 predictions: bool, optional (default=False) 1017 When set to True, the predictions of all the models models are returned as dataframe. 1018 1019 sort_by: string, optional (default='RMSE') 1020 Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE', 1021 'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and 1022 provided by custom_metric. 1023 1024 random_state: int, optional (default=42) 1025 Reproducibiility seed. 1026 1027 estimators: list, optional (default='all') 1028 list of Estimators (regression algorithms) names or just 'all' (default='all') 1029 1030 preprocess: bool, preprocessing is done when set to True 1031 1032 h: int, optional (default=None) 1033 Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]). 1034 1035 All the other parameters are the same as MTS's. 1036 1037 Attributes: 1038 1039 models_: dict-object 1040 Returns a dictionary with each model pipeline as value 1041 with key as name of models. 1042 1043 best_model_: object 1044 Returns the best model pipeline based on the sort_by metric. 1045 1046 Examples: 1047 1048 See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict 1049 1050 """ 1051 1052 def __init__( 1053 self, 1054 verbose=0, 1055 ignore_warnings=True, 1056 custom_metric=None, 1057 predictions=False, 1058 sort_by=None, # leave it as is 1059 random_state=42, 1060 estimators="all", 1061 preprocess=False, 1062 h=None, 1063 # MTS attributes 1064 obj=None, 1065 n_hidden_features=5, 1066 activation_name="relu", 1067 a=0.01, 1068 nodes_sim="sobol", 1069 bias=True, 1070 dropout=0, 1071 direct_link=True, 1072 n_clusters=2, 1073 cluster_encode=True, 1074 type_clust="kmeans", 1075 type_scaling=("std", "std", "std"), 1076 lags=15, 1077 type_pi="scp2-kde", 1078 block_size=None, 1079 replications=None, 1080 kernel=None, 1081 agg="mean", 1082 seed=123, 1083 backend="cpu", 1084 show_progress=False, 1085 ): 1086 super().__init__( 1087 verbose=verbose, 1088 ignore_warnings=ignore_warnings, 1089 custom_metric=custom_metric, 1090 predictions=predictions, 1091 sort_by=sort_by, 1092 random_state=random_state, 1093 estimators=estimators, 1094 preprocess=preprocess, 1095 n_layers=1, 1096 h=h, 1097 obj=obj, 1098 n_hidden_features=n_hidden_features, 1099 activation_name=activation_name, 1100 a=a, 1101 nodes_sim=nodes_sim, 1102 bias=bias, 1103 dropout=dropout, 1104 direct_link=direct_link, 1105 n_clusters=n_clusters, 1106 cluster_encode=cluster_encode, 1107 type_clust=type_clust, 1108 type_scaling=type_scaling, 1109 lags=lags, 1110 type_pi=type_pi, 1111 block_size=block_size, 1112 replications=replications, 1113 kernel=kernel, 1114 agg=agg, 1115 seed=seed, 1116 backend=backend, 1117 show_progress=show_progress, 1118 )
Fitting -- almost -- all the regression algorithms to multivariate time series and returning their scores (no layers).
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not
able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom
evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators (regression algorithms) names or just 'all' (default='all')
preprocess: bool, preprocessing is done when set to True
h: int, optional (default=None)
Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
All the other parameters are the same as MTS's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
104class LazyDeepMTS(MTS): 105 """ 106 107 Fitting -- almost -- all the regression algorithms with layers of 108 nnetsauce's CustomRegressor to multivariate time series 109 and returning their scores. 110 111 Parameters: 112 113 verbose: int, optional (default=0) 114 Any positive number for verbosity. 115 116 ignore_warnings: bool, optional (default=True) 117 When set to True, the warning related to algorigms that are not 118 able to run are ignored. 119 120 custom_metric: function, optional (default=None) 121 When function is provided, models are evaluated based on the custom 122 evaluation metric provided. 123 124 predictions: bool, optional (default=False) 125 When set to True, the predictions of all the models models are returned as dataframe. 126 127 sort_by: string, optional (default='RMSE') 128 Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE', 129 'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and 130 provided by custom_metric. 131 132 random_state: int, optional (default=42) 133 Reproducibiility seed. 134 135 estimators: list, optional (default='all') 136 list of Estimators (regression algorithms) names or just 'all' (default='all') 137 138 preprocess: bool, preprocessing is done when set to True 139 140 n_layers: int, optional (default=1) 141 Number of layers in the network. When set to 1, the model is equivalent to a MTS. 142 143 h: int, optional (default=None) 144 Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]). 145 146 All the other parameters are the same as MTS's. 147 148 Attributes: 149 150 models_: dict-object 151 Returns a dictionary with each model pipeline as value 152 with key as name of models. 153 154 best_model_: object 155 Returns the best model pipeline based on the sort_by metric. 156 157 Examples: 158 159 See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict 160 161 """ 162 163 def __init__( 164 self, 165 verbose=0, 166 ignore_warnings=True, 167 custom_metric=None, 168 predictions=False, 169 sort_by=None, # leave it as is 170 random_state=42, 171 estimators="all", 172 preprocess=False, 173 n_layers=1, 174 h=None, 175 # MTS attributes 176 obj=None, 177 n_hidden_features=5, 178 activation_name="relu", 179 a=0.01, 180 nodes_sim="sobol", 181 bias=True, 182 dropout=0, 183 direct_link=True, 184 n_clusters=2, 185 cluster_encode=True, 186 type_clust="kmeans", 187 type_scaling=("std", "std", "std"), 188 lags=15, 189 type_pi="scp2-kde", 190 block_size=None, 191 replications=None, 192 kernel=None, 193 agg="mean", 194 seed=123, 195 backend="cpu", 196 show_progress=False, 197 ): 198 self.verbose = verbose 199 self.ignore_warnings = ignore_warnings 200 self.custom_metric = custom_metric 201 self.predictions = predictions 202 self.sort_by = sort_by 203 self.models_ = {} 204 self.best_model_ = None 205 self.random_state = random_state 206 self.estimators = estimators 207 self.preprocess = preprocess 208 self.n_layers = n_layers 209 self.h = h 210 super().__init__( 211 obj=obj, 212 n_hidden_features=n_hidden_features, 213 activation_name=activation_name, 214 a=a, 215 nodes_sim=nodes_sim, 216 bias=bias, 217 dropout=dropout, 218 direct_link=direct_link, 219 n_clusters=n_clusters, 220 cluster_encode=cluster_encode, 221 type_clust=type_clust, 222 type_scaling=type_scaling, 223 seed=seed, 224 backend=backend, 225 lags=lags, 226 type_pi=type_pi, 227 block_size=block_size, 228 replications=replications, 229 kernel=kernel, 230 agg=agg, 231 verbose=verbose, 232 show_progress=show_progress, 233 ) 234 if self.replications is not None or self.type_pi == "gaussian": 235 if self.sort_by is None: 236 self.sort_by = "WINKLERSCORE" 237 else: 238 if self.sort_by is None: 239 self.sort_by = "RMSE" 240 241 def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs): 242 """Fit Regression algorithms to X_train, predict and score on X_test. 243 244 Parameters: 245 246 X_train: array-like or data frame, 247 Training vectors, where rows is the number of samples 248 and columns is the number of features. 249 250 X_test: array-like or data frame, 251 Testing vectors, where rows is the number of samples 252 and columns is the number of features. 253 254 xreg: array-like, optional (default=None) 255 Additional (external) regressors to be passed to self.obj 256 xreg must be in 'increasing' order (most recent observations last) 257 258 per_series: bool, optional (default=False) 259 When set to True, the metrics are computed series by series. 260 261 **kwargs: dict, optional (default=None) 262 Additional parameters to be passed to `fit` method of `obj`. 263 264 Returns: 265 266 scores: Pandas DataFrame 267 Returns metrics of all the models in a Pandas DataFrame. 268 269 predictions: Pandas DataFrame 270 Returns predictions of all the models in a Pandas DataFrame. 271 272 """ 273 R2 = [] 274 ADJR2 = [] 275 ME = [] 276 MPL = [] 277 RMSE = [] 278 MAE = [] 279 MPE = [] 280 MAPE = [] 281 WINKLERSCORE = [] 282 COVERAGE = [] 283 284 # WIN = [] 285 names = [] 286 TIME = [] 287 predictions = {} 288 289 if self.custom_metric is not None: 290 CUSTOM_METRIC = [] 291 292 if self.h is None: 293 assert X_test is not None, "If h is None, X_test must be provided." 294 295 if isinstance(X_train, np.ndarray): 296 X_train = pd.DataFrame(X_train) 297 X_test = pd.DataFrame(X_test) 298 299 self.series_names = X_train.columns.tolist() 300 301 X_train = convert_df_to_numeric(X_train) 302 X_test = convert_df_to_numeric(X_test) 303 304 numeric_features = X_train.select_dtypes(include=[np.number]).columns 305 categorical_features = X_train.select_dtypes(include=["object"]).columns 306 307 categorical_low, categorical_high = get_card_split( 308 X_train, categorical_features 309 ) 310 311 if self.preprocess: 312 preprocessor = ColumnTransformer( 313 transformers=[ 314 ("numeric", numeric_transformer, numeric_features), 315 ( 316 "categorical_low", 317 categorical_transformer_low, 318 categorical_low, 319 ), 320 ( 321 "categorical_high", 322 categorical_transformer_high, 323 categorical_high, 324 ), 325 ] 326 ) 327 328 # baselines (Classical MTS) ---- 329 for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]): 330 try: 331 start = time.time() 332 regr = ClassicalMTS(model=name) 333 regr.fit(X_train, **kwargs) 334 self.models_[name] = regr 335 if self.h is None: 336 X_pred = regr.predict(h=X_test.shape[0], **kwargs) 337 else: 338 assert self.h > 0, "h must be > 0" 339 X_pred = regr.predict(h=self.h, **kwargs) 340 try: 341 X_test = X_test[0: self.h, :] 342 except Exception as e: 343 X_test = X_test.iloc[0: self.h, :] 344 345 rmse = mean_errors( 346 actual=X_test, 347 pred=X_pred, 348 scoring="root_mean_squared_error", 349 per_series=per_series, 350 ) 351 mae = mean_errors( 352 actual=X_test, 353 pred=X_pred, 354 scoring="mean_absolute_error", 355 per_series=per_series, 356 ) 357 mpl = mean_errors( 358 actual=X_test, 359 pred=X_pred, 360 scoring="mean_pinball_loss", 361 per_series=per_series, 362 ) 363 except Exception: 364 continue 365 366 names.append(name) 367 RMSE.append(rmse) 368 MAE.append(mae) 369 MPL.append(mpl) 370 371 if self.custom_metric is not None: 372 try: 373 if self.h is None: 374 custom_metric = self.custom_metric(X_test, X_pred) 375 else: 376 custom_metric = self.custom_metric(X_test_h, X_pred) 377 CUSTOM_METRIC.append(custom_metric) 378 except Exception as e: 379 custom_metric = np.iinfo(np.float32).max 380 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 381 382 if (self.replications is not None) or (self.type_pi == "gaussian"): 383 if per_series == False: 384 winklerscore = winkler_score( 385 obj=X_pred, actual=X_test, level=95 386 ) 387 coveragecalc = coverage(X_pred, X_test, level=95) 388 else: 389 winklerscore = winkler_score( 390 obj=X_pred, actual=X_test, level=95, per_series=True 391 ) 392 coveragecalc = coverage( 393 X_pred, X_test, level=95, per_series=True 394 ) 395 WINKLERSCORE.append(winklerscore) 396 COVERAGE.append(coveragecalc) 397 TIME.append(time.time() - start) 398 399 if self.estimators == "all": 400 if self.n_layers <= 1: 401 self.regressors = REGRESSORSMTS 402 else: 403 self.regressors = DEEPREGRESSORSMTS 404 else: 405 if self.n_layers <= 1: 406 self.regressors = [ 407 ("MTS(" + est[0] + ")", est[1]) 408 for est in all_estimators() 409 if ( 410 issubclass(est[1], RegressorMixin) 411 and (est[0] in self.estimators) 412 ) 413 ] 414 else: # self.n_layers > 1 415 self.regressors = [ 416 ("DeepMTS(" + est[0] + ")", est[1]) 417 for est in all_estimators() 418 if ( 419 issubclass(est[1], RegressorMixin) 420 and (est[0] in self.estimators) 421 ) 422 ] 423 424 if self.preprocess is True: 425 for name, model in tqdm(self.regressors): # do parallel exec 426 start = time.time() 427 try: 428 if "random_state" in model().get_params().keys(): 429 pipe = Pipeline( 430 steps=[ 431 ("preprocessor", preprocessor), 432 ( 433 "regressor", 434 DeepMTS( 435 obj=model( 436 random_state=self.random_state, 437 **kwargs, 438 ), 439 n_layers=self.n_layers, 440 n_hidden_features=self.n_hidden_features, 441 activation_name=self.activation_name, 442 a=self.a, 443 nodes_sim=self.nodes_sim, 444 bias=self.bias, 445 dropout=self.dropout, 446 direct_link=self.direct_link, 447 n_clusters=self.n_clusters, 448 cluster_encode=self.cluster_encode, 449 type_clust=self.type_clust, 450 type_scaling=self.type_scaling, 451 lags=self.lags, 452 type_pi=self.type_pi, 453 block_size=self.block_size, 454 replications=self.replications, 455 kernel=self.kernel, 456 agg=self.agg, 457 seed=self.seed, 458 backend=self.backend, 459 show_progress=self.show_progress, 460 ), 461 ), 462 ] 463 ) 464 else: # "random_state" in model().get_params().keys() 465 pipe = Pipeline( 466 steps=[ 467 ("preprocessor", preprocessor), 468 ( 469 "regressor", 470 DeepMTS( 471 obj=model(**kwargs), 472 n_layers=self.n_layers, 473 n_hidden_features=self.n_hidden_features, 474 activation_name=self.activation_name, 475 a=self.a, 476 nodes_sim=self.nodes_sim, 477 bias=self.bias, 478 dropout=self.dropout, 479 direct_link=self.direct_link, 480 n_clusters=self.n_clusters, 481 cluster_encode=self.cluster_encode, 482 type_clust=self.type_clust, 483 type_scaling=self.type_scaling, 484 lags=self.lags, 485 type_pi=self.type_pi, 486 block_size=self.block_size, 487 replications=self.replications, 488 kernel=self.kernel, 489 agg=self.agg, 490 seed=self.seed, 491 backend=self.backend, 492 show_progress=self.show_progress, 493 ), 494 ), 495 ] 496 ) 497 498 pipe.fit(X_train, **kwargs) 499 # pipe.fit(X_train, xreg=xreg) 500 501 self.models_[name] = pipe 502 503 if self.h is None: 504 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 505 else: 506 assert self.h > 0, "h must be > 0" 507 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 508 509 if (self.replications is not None) or ( 510 self.type_pi == "gaussian" 511 ): 512 rmse = mean_errors( 513 actual=X_test, 514 pred=X_pred, 515 scoring="root_mean_squared_error", 516 per_series=per_series, 517 ) 518 mae = mean_errors( 519 actual=X_test, 520 pred=X_pred, 521 scoring="mean_absolute_error", 522 per_series=per_series, 523 ) 524 mpl = mean_errors( 525 actual=X_test, 526 pred=X_pred, 527 scoring="mean_pinball_loss", 528 per_series=per_series, 529 ) 530 winklerscore = winkler_score( 531 obj=X_pred, 532 actual=X_test, 533 level=95, 534 per_series=per_series, 535 ) 536 coveragecalc = coverage( 537 X_pred, X_test, level=95, per_series=per_series 538 ) 539 else: 540 rmse = mean_errors( 541 actual=X_test, 542 pred=X_pred, 543 scoring="root_mean_squared_error", 544 per_series=per_series, 545 ) 546 mae = mean_errors( 547 actual=X_test, 548 pred=X_pred, 549 scoring="mean_absolute_error", 550 per_series=per_series, 551 ) 552 mpl = mean_errors( 553 actual=X_test, 554 pred=X_pred, 555 scoring="mean_pinball_loss", 556 per_series=per_series, 557 ) 558 559 names.append(name) 560 RMSE.append(rmse) 561 MAE.append(mae) 562 MPL.append(mpl) 563 564 if (self.replications is not None) or ( 565 self.type_pi == "gaussian" 566 ): 567 WINKLERSCORE.append(winklerscore) 568 COVERAGE.append(coveragecalc) 569 TIME.append(time.time() - start) 570 571 if self.custom_metric is not None: 572 try: 573 custom_metric = self.custom_metric(X_test, X_pred) 574 CUSTOM_METRIC.append(custom_metric) 575 except Exception as e: 576 custom_metric = np.iinfo(np.float32).max 577 CUSTOM_METRIC.append(custom_metric) 578 579 if self.verbose > 0: 580 if (self.replications is not None) or ( 581 self.type_pi == "gaussian" 582 ): 583 scores_verbose = { 584 "Model": name, 585 "RMSE": rmse, 586 "MAE": mae, 587 "MPL": mpl, 588 "WINKLERSCORE": winklerscore, 589 "COVERAGE": coveragecalc, 590 "Time taken": time.time() - start, 591 } 592 else: 593 scores_verbose = { 594 "Model": name, 595 "RMSE": rmse, 596 "MAE": mae, 597 "MPL": mpl, 598 "Time taken": time.time() - start, 599 } 600 601 if self.custom_metric is not None: 602 scores_verbose["Custom metric"] = custom_metric 603 604 if self.predictions: 605 predictions[name] = X_pred 606 except Exception as exception: 607 if self.ignore_warnings is False: 608 print(name + " model failed to execute") 609 print(exception) 610 611 else: # no preprocessing 612 for name, model in tqdm(self.regressors): # do parallel exec 613 start = time.time() 614 try: 615 if "random_state" in model().get_params().keys(): 616 pipe = DeepMTS( 617 obj=model(random_state=self.random_state, **kwargs), 618 n_layers=self.n_layers, 619 n_hidden_features=self.n_hidden_features, 620 activation_name=self.activation_name, 621 a=self.a, 622 nodes_sim=self.nodes_sim, 623 bias=self.bias, 624 dropout=self.dropout, 625 direct_link=self.direct_link, 626 n_clusters=self.n_clusters, 627 cluster_encode=self.cluster_encode, 628 type_clust=self.type_clust, 629 type_scaling=self.type_scaling, 630 lags=self.lags, 631 type_pi=self.type_pi, 632 block_size=self.block_size, 633 replications=self.replications, 634 kernel=self.kernel, 635 agg=self.agg, 636 seed=self.seed, 637 backend=self.backend, 638 show_progress=self.show_progress, 639 ) 640 else: 641 pipe = DeepMTS( 642 obj=model(**kwargs), 643 n_layers=self.n_layers, 644 n_hidden_features=self.n_hidden_features, 645 activation_name=self.activation_name, 646 a=self.a, 647 nodes_sim=self.nodes_sim, 648 bias=self.bias, 649 dropout=self.dropout, 650 direct_link=self.direct_link, 651 n_clusters=self.n_clusters, 652 cluster_encode=self.cluster_encode, 653 type_clust=self.type_clust, 654 type_scaling=self.type_scaling, 655 lags=self.lags, 656 type_pi=self.type_pi, 657 block_size=self.block_size, 658 replications=self.replications, 659 kernel=self.kernel, 660 agg=self.agg, 661 seed=self.seed, 662 backend=self.backend, 663 show_progress=self.show_progress, 664 ) 665 666 pipe.fit(X_train, xreg, **kwargs) 667 # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead` 668 669 self.models_[name] = pipe 670 671 if self.preprocess is True: 672 if self.h is None: 673 X_pred = pipe["regressor"].predict( 674 h=X_test.shape[0], **kwargs 675 ) 676 else: 677 assert ( 678 self.h > 0 and self.h <= X_test.shape[0] 679 ), "h must be > 0 and < X_test.shape[0]" 680 X_pred = pipe["regressor"].predict( 681 h=self.h, **kwargs 682 ) 683 684 else: 685 if self.h is None: 686 X_pred = pipe.predict( 687 h=X_test.shape[0], 688 **kwargs, 689 # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead` 690 ) 691 else: 692 assert ( 693 self.h > 0 and self.h <= X_test.shape[0] 694 ), "h must be > 0 and < X_test.shape[0]" 695 X_pred = pipe.predict(h=self.h, **kwargs) 696 697 if self.h is None: 698 if (self.replications is not None) or ( 699 self.type_pi == "gaussian" 700 ): 701 rmse = mean_errors( 702 actual=X_test, 703 pred=X_pred.mean, 704 scoring="root_mean_squared_error", 705 per_series=per_series, 706 ) 707 mae = mean_errors( 708 actual=X_test, 709 pred=X_pred.mean, 710 scoring="mean_absolute_error", 711 per_series=per_series, 712 ) 713 mpl = mean_errors( 714 actual=X_test, 715 pred=X_pred.mean, 716 scoring="mean_pinball_loss", 717 per_series=per_series, 718 ) 719 winklerscore = winkler_score( 720 obj=X_pred, 721 actual=X_test, 722 level=95, 723 per_series=per_series, 724 ) 725 coveragecalc = coverage( 726 X_pred, X_test, level=95, per_series=per_series 727 ) 728 else: # no prediction interval 729 rmse = mean_errors( 730 actual=X_test, 731 pred=X_pred, 732 scoring="root_mean_squared_error", 733 per_series=per_series, 734 ) 735 mae = mean_errors( 736 actual=X_test, 737 pred=X_pred, 738 scoring="mean_absolute_error", 739 per_series=per_series, 740 ) 741 mpl = mean_errors( 742 actual=X_test, 743 pred=X_pred, 744 scoring="mean_pinball_loss", 745 per_series=per_series, 746 ) 747 else: # self.h is not None 748 if (self.replications is not None) or ( 749 self.type_pi == "gaussian" 750 ): 751 if isinstance(X_test, pd.DataFrame): 752 X_test_h = X_test.iloc[0: self.h, :] 753 rmse = mean_errors( 754 actual=X_test_h, 755 pred=X_pred, 756 scoring="root_mean_squared_error", 757 per_series=per_series, 758 ) 759 mae = mean_errors( 760 actual=X_test_h, 761 pred=X_pred, 762 scoring="mean_absolute_error", 763 per_series=per_series, 764 ) 765 mpl = mean_errors( 766 actual=X_test_h, 767 pred=X_pred, 768 scoring="mean_pinball_loss", 769 per_series=per_series, 770 ) 771 winklerscore = winkler_score( 772 obj=X_pred, 773 actual=X_test_h, 774 level=95, 775 per_series=per_series, 776 ) 777 coveragecalc = coverage( 778 X_pred, 779 X_test_h, 780 level=95, 781 per_series=per_series, 782 ) 783 else: 784 X_test_h = X_test[0: self.h, :] 785 rmse = mean_errors( 786 actual=X_test_h, 787 pred=X_pred, 788 scoring="root_mean_squared_error", 789 per_series=per_series, 790 ) 791 mae = mean_errors( 792 actual=X_test_h, 793 pred=X_pred, 794 scoring="mean_absolute_error", 795 per_series=per_series, 796 ) 797 mpl = mean_errors( 798 actual=X_test_h, 799 pred=X_pred, 800 scoring="mean_pinball_loss", 801 per_series=per_series, 802 ) 803 winklerscore = winkler_score( 804 obj=X_pred, 805 actual=X_test_h, 806 level=95, 807 per_series=per_series, 808 ) 809 coveragecalc = coverage( 810 X_pred, 811 X_test_h, 812 level=95, 813 per_series=per_series, 814 ) 815 else: # no prediction interval 816 if isinstance(X_test, pd.DataFrame): 817 X_test_h = X_test.iloc[0: self.h, :] 818 rmse = mean_errors( 819 actual=X_test_h, 820 pred=X_pred, 821 scoring="root_mean_squared_error", 822 per_series=per_series, 823 ) 824 mae = mean_errors( 825 actual=X_test_h, 826 pred=X_pred, 827 scoring="mean_absolute_error", 828 per_series=per_series, 829 ) 830 mpl = mean_errors( 831 actual=X_test_h, 832 pred=X_pred, 833 scoring="mean_pinball_loss", 834 per_series=per_series, 835 ) 836 else: 837 X_test_h = X_test[0: self.h, :] 838 rmse = mean_errors( 839 actual=X_test_h, 840 pred=X_pred, 841 scoring="root_mean_squared_error", 842 per_series=per_series, 843 ) 844 mae = mean_errors( 845 actual=X_test_h, 846 pred=X_pred, 847 scoring="mean_absolute_error", 848 per_series=per_series, 849 ) 850 851 names.append(name) 852 RMSE.append(rmse) 853 MAE.append(mae) 854 MPL.append(mpl) 855 if (self.replications is not None) or ( 856 self.type_pi == "gaussian" 857 ): 858 WINKLERSCORE.append(winklerscore) 859 COVERAGE.append(coveragecalc) 860 TIME.append(time.time() - start) 861 862 if self.custom_metric is not None: 863 try: 864 if self.h is None: 865 custom_metric = self.custom_metric( 866 X_test, X_pred 867 ) 868 else: 869 custom_metric = self.custom_metric( 870 X_test_h, X_pred 871 ) 872 CUSTOM_METRIC.append(custom_metric) 873 except Exception as e: 874 custom_metric = np.iinfo(np.float32).max 875 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 876 877 if self.verbose > 0: 878 if (self.replications is not None) or ( 879 self.type_pi == "gaussian" 880 ): 881 scores_verbose = { 882 "Model": name, 883 "RMSE": rmse, 884 "MAE": mae, 885 "MPL": mpl, 886 "WINKLERSCORE": winklerscore, 887 "COVERAGE": coveragecalc, 888 "Time taken": time.time() - start, 889 } 890 else: 891 scores_verbose = { 892 "Model": name, 893 "RMSE": rmse, 894 "MAE": mae, 895 "MPL": mpl, 896 "Time taken": time.time() - start, 897 } 898 899 if self.custom_metric is not None: 900 scores_verbose["Custom metric"] = custom_metric 901 902 if self.predictions: 903 predictions[name] = X_pred 904 905 except Exception as exception: 906 if self.ignore_warnings is False: 907 print(name + " model failed to execute") 908 print(exception) 909 910 if (self.replications is not None) or (self.type_pi == "gaussian"): 911 scores = { 912 "Model": names, 913 "RMSE": RMSE, 914 "MAE": MAE, 915 "MPL": MPL, 916 "WINKLERSCORE": WINKLERSCORE, 917 "COVERAGE": COVERAGE, 918 "Time Taken": TIME, 919 } 920 else: 921 scores = { 922 "Model": names, 923 "RMSE": RMSE, 924 "MAE": MAE, 925 "MPL": MPL, 926 "Time Taken": TIME, 927 } 928 929 if self.custom_metric is not None: 930 scores["Custom metric"] = CUSTOM_METRIC 931 932 if per_series: 933 scores = dict_to_dataframe_series(scores, self.series_names) 934 else: 935 scores = pd.DataFrame(scores) 936 937 try: # case per_series, can't be sorted 938 scores = scores.sort_values( 939 by=self.sort_by, ascending=True 940 ).set_index("Model") 941 942 self.best_model_ = self.models_[scores.index[0]] 943 except Exception as e: 944 pass 945 946 if self.predictions is True: 947 return scores, predictions 948 949 return scores 950 951 def get_best_model(self): 952 """ 953 This function returns the best model pipeline based on the sort_by metric. 954 955 Returns: 956 957 best_model: object, 958 Returns the best model pipeline based on the sort_by metric. 959 960 """ 961 return self.best_model_ 962 963 def provide_models(self, X_train, X_test): 964 """ 965 This function returns all the model objects trained in fit function. 966 If fit is not called already, then we call fit and then return the models. 967 968 Parameters: 969 970 X_train : array-like, 971 Training vectors, where rows is the number of samples 972 and columns is the number of features. 973 974 X_test : array-like, 975 Testing vectors, where rows is the number of samples 976 and columns is the number of features. 977 978 Returns: 979 980 models: dict-object, 981 Returns a dictionary with each model pipeline as value 982 with key as name of models. 983 984 """ 985 if self.h is None: 986 if len(self.models_.keys()) == 0: 987 self.fit(X_train, X_test) 988 else: 989 if len(self.models_.keys()) == 0: 990 if isinstance(X_test, pd.DataFrame): 991 self.fit(X_train, X_test.iloc[0: self.h, :]) 992 else: 993 self.fit(X_train, X_test[0: self.h, :]) 994 995 return self.models_
Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor to multivariate time series and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not
able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom
evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators (regression algorithms) names or just 'all' (default='all')
preprocess: bool, preprocessing is done when set to True
n_layers: int, optional (default=1)
Number of layers in the network. When set to 1, the model is equivalent to a MTS.
h: int, optional (default=None)
Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
All the other parameters are the same as MTS's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
241 def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs): 242 """Fit Regression algorithms to X_train, predict and score on X_test. 243 244 Parameters: 245 246 X_train: array-like or data frame, 247 Training vectors, where rows is the number of samples 248 and columns is the number of features. 249 250 X_test: array-like or data frame, 251 Testing vectors, where rows is the number of samples 252 and columns is the number of features. 253 254 xreg: array-like, optional (default=None) 255 Additional (external) regressors to be passed to self.obj 256 xreg must be in 'increasing' order (most recent observations last) 257 258 per_series: bool, optional (default=False) 259 When set to True, the metrics are computed series by series. 260 261 **kwargs: dict, optional (default=None) 262 Additional parameters to be passed to `fit` method of `obj`. 263 264 Returns: 265 266 scores: Pandas DataFrame 267 Returns metrics of all the models in a Pandas DataFrame. 268 269 predictions: Pandas DataFrame 270 Returns predictions of all the models in a Pandas DataFrame. 271 272 """ 273 R2 = [] 274 ADJR2 = [] 275 ME = [] 276 MPL = [] 277 RMSE = [] 278 MAE = [] 279 MPE = [] 280 MAPE = [] 281 WINKLERSCORE = [] 282 COVERAGE = [] 283 284 # WIN = [] 285 names = [] 286 TIME = [] 287 predictions = {} 288 289 if self.custom_metric is not None: 290 CUSTOM_METRIC = [] 291 292 if self.h is None: 293 assert X_test is not None, "If h is None, X_test must be provided." 294 295 if isinstance(X_train, np.ndarray): 296 X_train = pd.DataFrame(X_train) 297 X_test = pd.DataFrame(X_test) 298 299 self.series_names = X_train.columns.tolist() 300 301 X_train = convert_df_to_numeric(X_train) 302 X_test = convert_df_to_numeric(X_test) 303 304 numeric_features = X_train.select_dtypes(include=[np.number]).columns 305 categorical_features = X_train.select_dtypes(include=["object"]).columns 306 307 categorical_low, categorical_high = get_card_split( 308 X_train, categorical_features 309 ) 310 311 if self.preprocess: 312 preprocessor = ColumnTransformer( 313 transformers=[ 314 ("numeric", numeric_transformer, numeric_features), 315 ( 316 "categorical_low", 317 categorical_transformer_low, 318 categorical_low, 319 ), 320 ( 321 "categorical_high", 322 categorical_transformer_high, 323 categorical_high, 324 ), 325 ] 326 ) 327 328 # baselines (Classical MTS) ---- 329 for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]): 330 try: 331 start = time.time() 332 regr = ClassicalMTS(model=name) 333 regr.fit(X_train, **kwargs) 334 self.models_[name] = regr 335 if self.h is None: 336 X_pred = regr.predict(h=X_test.shape[0], **kwargs) 337 else: 338 assert self.h > 0, "h must be > 0" 339 X_pred = regr.predict(h=self.h, **kwargs) 340 try: 341 X_test = X_test[0: self.h, :] 342 except Exception as e: 343 X_test = X_test.iloc[0: self.h, :] 344 345 rmse = mean_errors( 346 actual=X_test, 347 pred=X_pred, 348 scoring="root_mean_squared_error", 349 per_series=per_series, 350 ) 351 mae = mean_errors( 352 actual=X_test, 353 pred=X_pred, 354 scoring="mean_absolute_error", 355 per_series=per_series, 356 ) 357 mpl = mean_errors( 358 actual=X_test, 359 pred=X_pred, 360 scoring="mean_pinball_loss", 361 per_series=per_series, 362 ) 363 except Exception: 364 continue 365 366 names.append(name) 367 RMSE.append(rmse) 368 MAE.append(mae) 369 MPL.append(mpl) 370 371 if self.custom_metric is not None: 372 try: 373 if self.h is None: 374 custom_metric = self.custom_metric(X_test, X_pred) 375 else: 376 custom_metric = self.custom_metric(X_test_h, X_pred) 377 CUSTOM_METRIC.append(custom_metric) 378 except Exception as e: 379 custom_metric = np.iinfo(np.float32).max 380 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 381 382 if (self.replications is not None) or (self.type_pi == "gaussian"): 383 if per_series == False: 384 winklerscore = winkler_score( 385 obj=X_pred, actual=X_test, level=95 386 ) 387 coveragecalc = coverage(X_pred, X_test, level=95) 388 else: 389 winklerscore = winkler_score( 390 obj=X_pred, actual=X_test, level=95, per_series=True 391 ) 392 coveragecalc = coverage( 393 X_pred, X_test, level=95, per_series=True 394 ) 395 WINKLERSCORE.append(winklerscore) 396 COVERAGE.append(coveragecalc) 397 TIME.append(time.time() - start) 398 399 if self.estimators == "all": 400 if self.n_layers <= 1: 401 self.regressors = REGRESSORSMTS 402 else: 403 self.regressors = DEEPREGRESSORSMTS 404 else: 405 if self.n_layers <= 1: 406 self.regressors = [ 407 ("MTS(" + est[0] + ")", est[1]) 408 for est in all_estimators() 409 if ( 410 issubclass(est[1], RegressorMixin) 411 and (est[0] in self.estimators) 412 ) 413 ] 414 else: # self.n_layers > 1 415 self.regressors = [ 416 ("DeepMTS(" + est[0] + ")", est[1]) 417 for est in all_estimators() 418 if ( 419 issubclass(est[1], RegressorMixin) 420 and (est[0] in self.estimators) 421 ) 422 ] 423 424 if self.preprocess is True: 425 for name, model in tqdm(self.regressors): # do parallel exec 426 start = time.time() 427 try: 428 if "random_state" in model().get_params().keys(): 429 pipe = Pipeline( 430 steps=[ 431 ("preprocessor", preprocessor), 432 ( 433 "regressor", 434 DeepMTS( 435 obj=model( 436 random_state=self.random_state, 437 **kwargs, 438 ), 439 n_layers=self.n_layers, 440 n_hidden_features=self.n_hidden_features, 441 activation_name=self.activation_name, 442 a=self.a, 443 nodes_sim=self.nodes_sim, 444 bias=self.bias, 445 dropout=self.dropout, 446 direct_link=self.direct_link, 447 n_clusters=self.n_clusters, 448 cluster_encode=self.cluster_encode, 449 type_clust=self.type_clust, 450 type_scaling=self.type_scaling, 451 lags=self.lags, 452 type_pi=self.type_pi, 453 block_size=self.block_size, 454 replications=self.replications, 455 kernel=self.kernel, 456 agg=self.agg, 457 seed=self.seed, 458 backend=self.backend, 459 show_progress=self.show_progress, 460 ), 461 ), 462 ] 463 ) 464 else: # "random_state" in model().get_params().keys() 465 pipe = Pipeline( 466 steps=[ 467 ("preprocessor", preprocessor), 468 ( 469 "regressor", 470 DeepMTS( 471 obj=model(**kwargs), 472 n_layers=self.n_layers, 473 n_hidden_features=self.n_hidden_features, 474 activation_name=self.activation_name, 475 a=self.a, 476 nodes_sim=self.nodes_sim, 477 bias=self.bias, 478 dropout=self.dropout, 479 direct_link=self.direct_link, 480 n_clusters=self.n_clusters, 481 cluster_encode=self.cluster_encode, 482 type_clust=self.type_clust, 483 type_scaling=self.type_scaling, 484 lags=self.lags, 485 type_pi=self.type_pi, 486 block_size=self.block_size, 487 replications=self.replications, 488 kernel=self.kernel, 489 agg=self.agg, 490 seed=self.seed, 491 backend=self.backend, 492 show_progress=self.show_progress, 493 ), 494 ), 495 ] 496 ) 497 498 pipe.fit(X_train, **kwargs) 499 # pipe.fit(X_train, xreg=xreg) 500 501 self.models_[name] = pipe 502 503 if self.h is None: 504 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 505 else: 506 assert self.h > 0, "h must be > 0" 507 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 508 509 if (self.replications is not None) or ( 510 self.type_pi == "gaussian" 511 ): 512 rmse = mean_errors( 513 actual=X_test, 514 pred=X_pred, 515 scoring="root_mean_squared_error", 516 per_series=per_series, 517 ) 518 mae = mean_errors( 519 actual=X_test, 520 pred=X_pred, 521 scoring="mean_absolute_error", 522 per_series=per_series, 523 ) 524 mpl = mean_errors( 525 actual=X_test, 526 pred=X_pred, 527 scoring="mean_pinball_loss", 528 per_series=per_series, 529 ) 530 winklerscore = winkler_score( 531 obj=X_pred, 532 actual=X_test, 533 level=95, 534 per_series=per_series, 535 ) 536 coveragecalc = coverage( 537 X_pred, X_test, level=95, per_series=per_series 538 ) 539 else: 540 rmse = mean_errors( 541 actual=X_test, 542 pred=X_pred, 543 scoring="root_mean_squared_error", 544 per_series=per_series, 545 ) 546 mae = mean_errors( 547 actual=X_test, 548 pred=X_pred, 549 scoring="mean_absolute_error", 550 per_series=per_series, 551 ) 552 mpl = mean_errors( 553 actual=X_test, 554 pred=X_pred, 555 scoring="mean_pinball_loss", 556 per_series=per_series, 557 ) 558 559 names.append(name) 560 RMSE.append(rmse) 561 MAE.append(mae) 562 MPL.append(mpl) 563 564 if (self.replications is not None) or ( 565 self.type_pi == "gaussian" 566 ): 567 WINKLERSCORE.append(winklerscore) 568 COVERAGE.append(coveragecalc) 569 TIME.append(time.time() - start) 570 571 if self.custom_metric is not None: 572 try: 573 custom_metric = self.custom_metric(X_test, X_pred) 574 CUSTOM_METRIC.append(custom_metric) 575 except Exception as e: 576 custom_metric = np.iinfo(np.float32).max 577 CUSTOM_METRIC.append(custom_metric) 578 579 if self.verbose > 0: 580 if (self.replications is not None) or ( 581 self.type_pi == "gaussian" 582 ): 583 scores_verbose = { 584 "Model": name, 585 "RMSE": rmse, 586 "MAE": mae, 587 "MPL": mpl, 588 "WINKLERSCORE": winklerscore, 589 "COVERAGE": coveragecalc, 590 "Time taken": time.time() - start, 591 } 592 else: 593 scores_verbose = { 594 "Model": name, 595 "RMSE": rmse, 596 "MAE": mae, 597 "MPL": mpl, 598 "Time taken": time.time() - start, 599 } 600 601 if self.custom_metric is not None: 602 scores_verbose["Custom metric"] = custom_metric 603 604 if self.predictions: 605 predictions[name] = X_pred 606 except Exception as exception: 607 if self.ignore_warnings is False: 608 print(name + " model failed to execute") 609 print(exception) 610 611 else: # no preprocessing 612 for name, model in tqdm(self.regressors): # do parallel exec 613 start = time.time() 614 try: 615 if "random_state" in model().get_params().keys(): 616 pipe = DeepMTS( 617 obj=model(random_state=self.random_state, **kwargs), 618 n_layers=self.n_layers, 619 n_hidden_features=self.n_hidden_features, 620 activation_name=self.activation_name, 621 a=self.a, 622 nodes_sim=self.nodes_sim, 623 bias=self.bias, 624 dropout=self.dropout, 625 direct_link=self.direct_link, 626 n_clusters=self.n_clusters, 627 cluster_encode=self.cluster_encode, 628 type_clust=self.type_clust, 629 type_scaling=self.type_scaling, 630 lags=self.lags, 631 type_pi=self.type_pi, 632 block_size=self.block_size, 633 replications=self.replications, 634 kernel=self.kernel, 635 agg=self.agg, 636 seed=self.seed, 637 backend=self.backend, 638 show_progress=self.show_progress, 639 ) 640 else: 641 pipe = DeepMTS( 642 obj=model(**kwargs), 643 n_layers=self.n_layers, 644 n_hidden_features=self.n_hidden_features, 645 activation_name=self.activation_name, 646 a=self.a, 647 nodes_sim=self.nodes_sim, 648 bias=self.bias, 649 dropout=self.dropout, 650 direct_link=self.direct_link, 651 n_clusters=self.n_clusters, 652 cluster_encode=self.cluster_encode, 653 type_clust=self.type_clust, 654 type_scaling=self.type_scaling, 655 lags=self.lags, 656 type_pi=self.type_pi, 657 block_size=self.block_size, 658 replications=self.replications, 659 kernel=self.kernel, 660 agg=self.agg, 661 seed=self.seed, 662 backend=self.backend, 663 show_progress=self.show_progress, 664 ) 665 666 pipe.fit(X_train, xreg, **kwargs) 667 # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead` 668 669 self.models_[name] = pipe 670 671 if self.preprocess is True: 672 if self.h is None: 673 X_pred = pipe["regressor"].predict( 674 h=X_test.shape[0], **kwargs 675 ) 676 else: 677 assert ( 678 self.h > 0 and self.h <= X_test.shape[0] 679 ), "h must be > 0 and < X_test.shape[0]" 680 X_pred = pipe["regressor"].predict( 681 h=self.h, **kwargs 682 ) 683 684 else: 685 if self.h is None: 686 X_pred = pipe.predict( 687 h=X_test.shape[0], 688 **kwargs, 689 # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead` 690 ) 691 else: 692 assert ( 693 self.h > 0 and self.h <= X_test.shape[0] 694 ), "h must be > 0 and < X_test.shape[0]" 695 X_pred = pipe.predict(h=self.h, **kwargs) 696 697 if self.h is None: 698 if (self.replications is not None) or ( 699 self.type_pi == "gaussian" 700 ): 701 rmse = mean_errors( 702 actual=X_test, 703 pred=X_pred.mean, 704 scoring="root_mean_squared_error", 705 per_series=per_series, 706 ) 707 mae = mean_errors( 708 actual=X_test, 709 pred=X_pred.mean, 710 scoring="mean_absolute_error", 711 per_series=per_series, 712 ) 713 mpl = mean_errors( 714 actual=X_test, 715 pred=X_pred.mean, 716 scoring="mean_pinball_loss", 717 per_series=per_series, 718 ) 719 winklerscore = winkler_score( 720 obj=X_pred, 721 actual=X_test, 722 level=95, 723 per_series=per_series, 724 ) 725 coveragecalc = coverage( 726 X_pred, X_test, level=95, per_series=per_series 727 ) 728 else: # no prediction interval 729 rmse = mean_errors( 730 actual=X_test, 731 pred=X_pred, 732 scoring="root_mean_squared_error", 733 per_series=per_series, 734 ) 735 mae = mean_errors( 736 actual=X_test, 737 pred=X_pred, 738 scoring="mean_absolute_error", 739 per_series=per_series, 740 ) 741 mpl = mean_errors( 742 actual=X_test, 743 pred=X_pred, 744 scoring="mean_pinball_loss", 745 per_series=per_series, 746 ) 747 else: # self.h is not None 748 if (self.replications is not None) or ( 749 self.type_pi == "gaussian" 750 ): 751 if isinstance(X_test, pd.DataFrame): 752 X_test_h = X_test.iloc[0: self.h, :] 753 rmse = mean_errors( 754 actual=X_test_h, 755 pred=X_pred, 756 scoring="root_mean_squared_error", 757 per_series=per_series, 758 ) 759 mae = mean_errors( 760 actual=X_test_h, 761 pred=X_pred, 762 scoring="mean_absolute_error", 763 per_series=per_series, 764 ) 765 mpl = mean_errors( 766 actual=X_test_h, 767 pred=X_pred, 768 scoring="mean_pinball_loss", 769 per_series=per_series, 770 ) 771 winklerscore = winkler_score( 772 obj=X_pred, 773 actual=X_test_h, 774 level=95, 775 per_series=per_series, 776 ) 777 coveragecalc = coverage( 778 X_pred, 779 X_test_h, 780 level=95, 781 per_series=per_series, 782 ) 783 else: 784 X_test_h = X_test[0: self.h, :] 785 rmse = mean_errors( 786 actual=X_test_h, 787 pred=X_pred, 788 scoring="root_mean_squared_error", 789 per_series=per_series, 790 ) 791 mae = mean_errors( 792 actual=X_test_h, 793 pred=X_pred, 794 scoring="mean_absolute_error", 795 per_series=per_series, 796 ) 797 mpl = mean_errors( 798 actual=X_test_h, 799 pred=X_pred, 800 scoring="mean_pinball_loss", 801 per_series=per_series, 802 ) 803 winklerscore = winkler_score( 804 obj=X_pred, 805 actual=X_test_h, 806 level=95, 807 per_series=per_series, 808 ) 809 coveragecalc = coverage( 810 X_pred, 811 X_test_h, 812 level=95, 813 per_series=per_series, 814 ) 815 else: # no prediction interval 816 if isinstance(X_test, pd.DataFrame): 817 X_test_h = X_test.iloc[0: self.h, :] 818 rmse = mean_errors( 819 actual=X_test_h, 820 pred=X_pred, 821 scoring="root_mean_squared_error", 822 per_series=per_series, 823 ) 824 mae = mean_errors( 825 actual=X_test_h, 826 pred=X_pred, 827 scoring="mean_absolute_error", 828 per_series=per_series, 829 ) 830 mpl = mean_errors( 831 actual=X_test_h, 832 pred=X_pred, 833 scoring="mean_pinball_loss", 834 per_series=per_series, 835 ) 836 else: 837 X_test_h = X_test[0: self.h, :] 838 rmse = mean_errors( 839 actual=X_test_h, 840 pred=X_pred, 841 scoring="root_mean_squared_error", 842 per_series=per_series, 843 ) 844 mae = mean_errors( 845 actual=X_test_h, 846 pred=X_pred, 847 scoring="mean_absolute_error", 848 per_series=per_series, 849 ) 850 851 names.append(name) 852 RMSE.append(rmse) 853 MAE.append(mae) 854 MPL.append(mpl) 855 if (self.replications is not None) or ( 856 self.type_pi == "gaussian" 857 ): 858 WINKLERSCORE.append(winklerscore) 859 COVERAGE.append(coveragecalc) 860 TIME.append(time.time() - start) 861 862 if self.custom_metric is not None: 863 try: 864 if self.h is None: 865 custom_metric = self.custom_metric( 866 X_test, X_pred 867 ) 868 else: 869 custom_metric = self.custom_metric( 870 X_test_h, X_pred 871 ) 872 CUSTOM_METRIC.append(custom_metric) 873 except Exception as e: 874 custom_metric = np.iinfo(np.float32).max 875 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 876 877 if self.verbose > 0: 878 if (self.replications is not None) or ( 879 self.type_pi == "gaussian" 880 ): 881 scores_verbose = { 882 "Model": name, 883 "RMSE": rmse, 884 "MAE": mae, 885 "MPL": mpl, 886 "WINKLERSCORE": winklerscore, 887 "COVERAGE": coveragecalc, 888 "Time taken": time.time() - start, 889 } 890 else: 891 scores_verbose = { 892 "Model": name, 893 "RMSE": rmse, 894 "MAE": mae, 895 "MPL": mpl, 896 "Time taken": time.time() - start, 897 } 898 899 if self.custom_metric is not None: 900 scores_verbose["Custom metric"] = custom_metric 901 902 if self.predictions: 903 predictions[name] = X_pred 904 905 except Exception as exception: 906 if self.ignore_warnings is False: 907 print(name + " model failed to execute") 908 print(exception) 909 910 if (self.replications is not None) or (self.type_pi == "gaussian"): 911 scores = { 912 "Model": names, 913 "RMSE": RMSE, 914 "MAE": MAE, 915 "MPL": MPL, 916 "WINKLERSCORE": WINKLERSCORE, 917 "COVERAGE": COVERAGE, 918 "Time Taken": TIME, 919 } 920 else: 921 scores = { 922 "Model": names, 923 "RMSE": RMSE, 924 "MAE": MAE, 925 "MPL": MPL, 926 "Time Taken": TIME, 927 } 928 929 if self.custom_metric is not None: 930 scores["Custom metric"] = CUSTOM_METRIC 931 932 if per_series: 933 scores = dict_to_dataframe_series(scores, self.series_names) 934 else: 935 scores = pd.DataFrame(scores) 936 937 try: # case per_series, can't be sorted 938 scores = scores.sort_values( 939 by=self.sort_by, ascending=True 940 ).set_index("Model") 941 942 self.best_model_ = self.models_[scores.index[0]] 943 except Exception as e: 944 pass 945 946 if self.predictions is True: 947 return scores, predictions 948 949 return scores
Fit Regression algorithms to X_train, predict and score on X_test.
Parameters:
X_train: array-like or data frame,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test: array-like or data frame,
Testing vectors, where rows is the number of samples
and columns is the number of features.
xreg: array-like, optional (default=None)
Additional (external) regressors to be passed to self.obj
xreg must be in 'increasing' order (most recent observations last)
per_series: bool, optional (default=False)
When set to True, the metrics are computed series by series.
**kwargs: dict, optional (default=None)
Additional parameters to be passed to `fit` method of `obj`.
Returns:
scores: Pandas DataFrame
Returns metrics of all the models in a Pandas DataFrame.
predictions: Pandas DataFrame
Returns predictions of all the models in a Pandas DataFrame.
963 def provide_models(self, X_train, X_test): 964 """ 965 This function returns all the model objects trained in fit function. 966 If fit is not called already, then we call fit and then return the models. 967 968 Parameters: 969 970 X_train : array-like, 971 Training vectors, where rows is the number of samples 972 and columns is the number of features. 973 974 X_test : array-like, 975 Testing vectors, where rows is the number of samples 976 and columns is the number of features. 977 978 Returns: 979 980 models: dict-object, 981 Returns a dictionary with each model pipeline as value 982 with key as name of models. 983 984 """ 985 if self.h is None: 986 if len(self.models_.keys()) == 0: 987 self.fit(X_train, X_test) 988 else: 989 if len(self.models_.keys()) == 0: 990 if isinstance(X_test, pd.DataFrame): 991 self.fit(X_train, X_test.iloc[0: self.h, :]) 992 else: 993 self.fit(X_train, X_test[0: self.h, :]) 994 995 return self.models_
This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.
Parameters:
X_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
models: dict-object,
Returns a dictionary with each model pipeline as value
with key as name of models.
10class MLARCH: 11 """Machine Learning-agnostic ARCH for nearly-stationary time series (e.g., returns) 12 13 Parameters 14 ---------- 15 model_mean : object 16 Model for mean component 17 model_sigma : object 18 Model for volatility component (sklearn regressor) 19 model_residuals : object 20 Model for standardized residuals 21 lags_vol : int, default=10 22 Number of lags for squared residuals in volatility model 23 """ 24 25 def __init__(self, model_mean, model_sigma, model_residuals, lags_vol=10): 26 self.model_mean = model_mean 27 self.model_sigma = model_sigma 28 self.model_residuals = model_residuals 29 self.lags_vol = lags_vol 30 31 def _create_lags(self, y, lags): 32 """Create lagged feature matrix""" 33 n = len(y) 34 if n <= lags: 35 raise ValueError(f"Series length {n} must be > lags {lags}") 36 X = np.zeros((n - lags, lags)) 37 for i in range(lags): 38 X[:, i] = y[i: (n - lags + i)] 39 return X 40 41 def fit(self, y, **kwargs): 42 """Fit the MLARCH model 43 44 Parameters 45 ---------- 46 y : array-like 47 Target time series (should be stationary, e.g., returns) 48 49 Returns 50 ------- 51 self 52 """ 53 # Format input 54 if isinstance(y, (pd.Series, pd.DataFrame)): 55 y = y.values 56 y = y.ravel() 57 58 if len(y) < self.lags_vol + 20: 59 raise ValueError(f"Need at least {self.lags_vol + 20} observations") 60 61 # Step 1: Fit mean model 62 self.model_mean.fit(y.reshape(-1, 1)) 63 mean_residuals = self.model_mean.residuals_.ravel() 64 65 # Step 2: Fit ARCH volatility model on lagged squared residuals 66 resid_squared = mean_residuals**2 67 X_vol = self._create_lags(resid_squared, self.lags_vol) 68 y_vol = np.log(resid_squared[self.lags_vol:] + 1e-8) 69 70 self.model_sigma.fit(X_vol, y_vol) 71 72 # Get fitted volatility 73 fitted_log_sigma = self.model_sigma.predict(X_vol) 74 fitted_sigma = np.exp(fitted_log_sigma) 75 76 # Step 3: Compute standardized residuals with proper scaling 77 standardized_residuals = mean_residuals[self.lags_vol:] / np.sqrt( 78 fitted_sigma 79 ) 80 81 # Enforce zero mean and unit variance 82 self.z_mean_ = np.mean(standardized_residuals) 83 self.z_std_ = np.std(standardized_residuals) 84 standardized_residuals = ( 85 standardized_residuals - self.z_mean_ 86 ) / self.z_std_ 87 88 # Step 4: Fit residuals model 89 self.model_residuals.fit(standardized_residuals.reshape(-1, 1)) 90 91 # Store for prediction 92 self.last_residuals_squared_ = resid_squared[-self.lags_vol:] 93 94 # Store diagnostics 95 self.fitted_volatility_mean_ = np.mean(np.sqrt(fitted_sigma)) 96 self.fitted_volatility_std_ = np.std(np.sqrt(fitted_sigma)) 97 98 return self 99 100 def predict(self, h=5, level=95, return_sims=False): 101 """Predict future values 102 103 Parameters 104 ---------- 105 h : int 106 Forecast horizon 107 level : int 108 Confidence level for prediction intervals 109 return_sims : bool 110 If True, return full simulation paths 111 112 Returns 113 ------- 114 DescribeResult 115 Named tuple with mean, sims, lower, upper 116 """ 117 DescribeResult = namedtuple( 118 "DescribeResult", ("mean", "sims", "lower", "upper") 119 ) 120 121 # Get mean forecast 122 mean_forecast = self.model_mean.predict(h=h).values.ravel() 123 124 # Recursive ARCH volatility forecasting 125 sigma_forecast = np.zeros(h) 126 current_lags = self.last_residuals_squared_.copy() 127 128 for i in range(h): 129 X_t = current_lags.reshape(1, -1) 130 log_sigma_t = self.model_sigma.predict(X_t)[0] 131 sigma_forecast[i] = np.exp(log_sigma_t) 132 # Update lags with predicted variance 133 current_lags = np.append(current_lags[1:], sigma_forecast[i]) 134 135 # Predict standardized residuals and rescale 136 z_forecast_normalized = self.model_residuals.predict(h=h).values.ravel() 137 z_forecast = z_forecast_normalized * self.z_std_ + self.z_mean_ 138 139 # Combine: μ + z × σ 140 point_forecast = mean_forecast + z_forecast * np.sqrt(sigma_forecast) 141 142 # Generate prediction intervals 143 sims = None 144 if return_sims: 145 preds_z_for_sims = self.model_residuals.predict(h=h) 146 if hasattr(preds_z_for_sims, "sims") and isinstance( 147 preds_z_for_sims.sims, pd.DataFrame 148 ): 149 sims_z_normalized = preds_z_for_sims.sims 150 n_sims = sims_z_normalized.shape[1] 151 152 sims = np.zeros((h, n_sims)) 153 for sim_idx in range(n_sims): 154 # Rescale simulations 155 z_sim = ( 156 sims_z_normalized.iloc[:, sim_idx].values * self.z_std_ 157 + self.z_mean_ 158 ) 159 sims[:, sim_idx] = mean_forecast + z_sim * np.sqrt( 160 sigma_forecast 161 ) 162 163 alpha = 1 - level / 100 164 lower_bound = np.quantile(sims, alpha / 2, axis=1) 165 upper_bound = np.quantile(sims, 1 - alpha / 2, axis=1) 166 else: 167 # Fallback to Gaussian 168 z_score = norm.ppf(1 - (1 - level / 100) / 2) 169 margin = z_score * np.sqrt(sigma_forecast) * self.z_std_ 170 lower_bound = point_forecast - margin 171 upper_bound = point_forecast + margin 172 else: 173 # Gaussian intervals with proper scaling 174 z_score = norm.ppf(1 - (1 - level / 100) / 2) 175 margin = z_score * np.sqrt(sigma_forecast) * self.z_std_ 176 lower_bound = point_forecast - margin 177 upper_bound = point_forecast + margin 178 179 return DescribeResult(point_forecast, sims, lower_bound, upper_bound)
Machine Learning-agnostic ARCH for nearly-stationary time series (e.g., returns)
Parameters
model_mean : object Model for mean component model_sigma : object Model for volatility component (sklearn regressor) model_residuals : object Model for standardized residuals lags_vol : int, default=10 Number of lags for squared residuals in volatility model
41 def fit(self, y, **kwargs): 42 """Fit the MLARCH model 43 44 Parameters 45 ---------- 46 y : array-like 47 Target time series (should be stationary, e.g., returns) 48 49 Returns 50 ------- 51 self 52 """ 53 # Format input 54 if isinstance(y, (pd.Series, pd.DataFrame)): 55 y = y.values 56 y = y.ravel() 57 58 if len(y) < self.lags_vol + 20: 59 raise ValueError(f"Need at least {self.lags_vol + 20} observations") 60 61 # Step 1: Fit mean model 62 self.model_mean.fit(y.reshape(-1, 1)) 63 mean_residuals = self.model_mean.residuals_.ravel() 64 65 # Step 2: Fit ARCH volatility model on lagged squared residuals 66 resid_squared = mean_residuals**2 67 X_vol = self._create_lags(resid_squared, self.lags_vol) 68 y_vol = np.log(resid_squared[self.lags_vol:] + 1e-8) 69 70 self.model_sigma.fit(X_vol, y_vol) 71 72 # Get fitted volatility 73 fitted_log_sigma = self.model_sigma.predict(X_vol) 74 fitted_sigma = np.exp(fitted_log_sigma) 75 76 # Step 3: Compute standardized residuals with proper scaling 77 standardized_residuals = mean_residuals[self.lags_vol:] / np.sqrt( 78 fitted_sigma 79 ) 80 81 # Enforce zero mean and unit variance 82 self.z_mean_ = np.mean(standardized_residuals) 83 self.z_std_ = np.std(standardized_residuals) 84 standardized_residuals = ( 85 standardized_residuals - self.z_mean_ 86 ) / self.z_std_ 87 88 # Step 4: Fit residuals model 89 self.model_residuals.fit(standardized_residuals.reshape(-1, 1)) 90 91 # Store for prediction 92 self.last_residuals_squared_ = resid_squared[-self.lags_vol:] 93 94 # Store diagnostics 95 self.fitted_volatility_mean_ = np.mean(np.sqrt(fitted_sigma)) 96 self.fitted_volatility_std_ = np.std(np.sqrt(fitted_sigma)) 97 98 return self
Fit the MLARCH model
Parameters
y : array-like Target time series (should be stationary, e.g., returns)
Returns
self
100 def predict(self, h=5, level=95, return_sims=False): 101 """Predict future values 102 103 Parameters 104 ---------- 105 h : int 106 Forecast horizon 107 level : int 108 Confidence level for prediction intervals 109 return_sims : bool 110 If True, return full simulation paths 111 112 Returns 113 ------- 114 DescribeResult 115 Named tuple with mean, sims, lower, upper 116 """ 117 DescribeResult = namedtuple( 118 "DescribeResult", ("mean", "sims", "lower", "upper") 119 ) 120 121 # Get mean forecast 122 mean_forecast = self.model_mean.predict(h=h).values.ravel() 123 124 # Recursive ARCH volatility forecasting 125 sigma_forecast = np.zeros(h) 126 current_lags = self.last_residuals_squared_.copy() 127 128 for i in range(h): 129 X_t = current_lags.reshape(1, -1) 130 log_sigma_t = self.model_sigma.predict(X_t)[0] 131 sigma_forecast[i] = np.exp(log_sigma_t) 132 # Update lags with predicted variance 133 current_lags = np.append(current_lags[1:], sigma_forecast[i]) 134 135 # Predict standardized residuals and rescale 136 z_forecast_normalized = self.model_residuals.predict(h=h).values.ravel() 137 z_forecast = z_forecast_normalized * self.z_std_ + self.z_mean_ 138 139 # Combine: μ + z × σ 140 point_forecast = mean_forecast + z_forecast * np.sqrt(sigma_forecast) 141 142 # Generate prediction intervals 143 sims = None 144 if return_sims: 145 preds_z_for_sims = self.model_residuals.predict(h=h) 146 if hasattr(preds_z_for_sims, "sims") and isinstance( 147 preds_z_for_sims.sims, pd.DataFrame 148 ): 149 sims_z_normalized = preds_z_for_sims.sims 150 n_sims = sims_z_normalized.shape[1] 151 152 sims = np.zeros((h, n_sims)) 153 for sim_idx in range(n_sims): 154 # Rescale simulations 155 z_sim = ( 156 sims_z_normalized.iloc[:, sim_idx].values * self.z_std_ 157 + self.z_mean_ 158 ) 159 sims[:, sim_idx] = mean_forecast + z_sim * np.sqrt( 160 sigma_forecast 161 ) 162 163 alpha = 1 - level / 100 164 lower_bound = np.quantile(sims, alpha / 2, axis=1) 165 upper_bound = np.quantile(sims, 1 - alpha / 2, axis=1) 166 else: 167 # Fallback to Gaussian 168 z_score = norm.ppf(1 - (1 - level / 100) / 2) 169 margin = z_score * np.sqrt(sigma_forecast) * self.z_std_ 170 lower_bound = point_forecast - margin 171 upper_bound = point_forecast + margin 172 else: 173 # Gaussian intervals with proper scaling 174 z_score = norm.ppf(1 - (1 - level / 100) / 2) 175 margin = z_score * np.sqrt(sigma_forecast) * self.z_std_ 176 lower_bound = point_forecast - margin 177 upper_bound = point_forecast + margin 178 179 return DescribeResult(point_forecast, sims, lower_bound, upper_bound)
Predict future values
Parameters
h : int Forecast horizon level : int Confidence level for prediction intervals return_sims : bool If True, return full simulation paths
Returns
DescribeResult Named tuple with mean, sims, lower, upper
6class MedianVotingRegressor(VotingRegressor): 7 def predict(self, X): 8 """ 9 Predict using the median of the base regressors' predictions. 10 11 Parameters: 12 X (array-like): Feature matrix for predictions. 13 14 Returns: 15 y_pred (array): Median of predictions from the base regressors. 16 """ 17 predictions = np.asarray( 18 [regressor.predict(X) for regressor in self.estimators_] 19 ) 20 return np.median(predictions, axis=0)
Prediction voting regressor for unfitted estimators.
A voting regressor is an ensemble meta-estimator that fits several base regressors, each on the whole dataset. Then it averages the individual predictions to form a final prediction.
For a detailed example, refer to
:ref:sphx_glr_auto_examples_ensemble_plot_voting_regressor.py.
Read more in the :ref:User Guide <voting_regressor>.
New in version 0.21.
Parameters
estimators : list of (str, estimator) tuples
Invoking the fit method on the VotingRegressor will fit clones
of those original estimators that will be stored in the class attribute
self.estimators_. An estimator can be set to 'drop' using
set_params().
*Changed in version 0.21:*
``'drop'`` is accepted. Using None was deprecated in 0.22 and
support was removed in 0.24.
weights : array-like of shape (n_regressors,), default=None
Sequence of weights (float or int) to weight the occurrences of
predicted values before averaging. Uses uniform weights if None.
n_jobs : int, default=None
The number of jobs to run in parallel for fit.
None means 1 unless in a joblib.parallel_backend context.
-1 means using all processors. See :term:Glossary <n_jobs>
for more details.
verbose : bool, default=False If True, the time elapsed while fitting will be printed as it is completed.
*New in version 0.23.*
Attributes
estimators_ : list of regressors
The collection of fitted sub-estimators as defined in estimators
that are not 'drop'.
named_estimators_ : ~sklearn.utils.Bunch
Attribute to access any fitted sub-estimators by name.
*New in version 0.20.*
n_features_in_ : int
Number of features seen during :term:fit. Only defined if the
underlying regressor exposes such an attribute when fit.
*New in version 0.24.*
feature_names_in_ : ndarray of shape (n_features_in_,)
Names of features seen during :term:fit. Only defined if the
underlying estimators expose such an attribute when fit.
*New in version 1.0.*
See Also
VotingClassifier : Soft Voting/Majority Rule classifier.
Examples
>>> import numpy as np
>>> from sklearn.linear_model import LinearRegression
>>> from sklearn.ensemble import RandomForestRegressor
>>> from sklearn.ensemble import VotingRegressor
>>> from sklearn.neighbors import KNeighborsRegressor
>>> r1 = LinearRegression()
>>> r2 = RandomForestRegressor(n_estimators=10, random_state=1)
>>> r3 = KNeighborsRegressor()
>>> X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])
>>> y = np.array([2, 6, 12, 20, 30, 42])
>>> er = VotingRegressor([('lr', r1), ('rf', r2), ('r3', r3)])
>>> print(er.fit(X, y).predict(X))
[ 6.8 8.4 12.5 17.8 26 34]
In the following example, we drop the 'lr' estimator with
~VotingRegressor.set_params() and fit the remaining two estimators:
>>> er = er.set_params(lr='drop')
>>> er = er.fit(X, y)
>>> len(er.estimators_)
2
7 def predict(self, X): 8 """ 9 Predict using the median of the base regressors' predictions. 10 11 Parameters: 12 X (array-like): Feature matrix for predictions. 13 14 Returns: 15 y_pred (array): Median of predictions from the base regressors. 16 """ 17 predictions = np.asarray( 18 [regressor.predict(X) for regressor in self.estimators_] 19 ) 20 return np.median(predictions, axis=0)
Predict using the median of the base regressors' predictions.
Parameters: X (array-like): Feature matrix for predictions.
Returns: y_pred (array): Median of predictions from the base regressors.
31class MTS(Base): 32 """Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks 33 34 Parameters: 35 36 obj: object. 37 any object containing a method fit (obj.fit()) and a method predict 38 (obj.predict()). 39 40 n_hidden_features: int. 41 number of nodes in the hidden layer. 42 43 activation_name: str. 44 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'. 45 46 a: float. 47 hyperparameter for 'prelu' or 'elu' activation function. 48 49 nodes_sim: str. 50 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 51 'uniform'. 52 53 bias: boolean. 54 indicates if the hidden layer contains a bias term (True) or not 55 (False). 56 57 dropout: float. 58 regularization parameter; (random) percentage of nodes dropped out 59 of the training. 60 61 direct_link: boolean. 62 indicates if the original predictors are included (True) in model's fitting or not (False). 63 64 n_clusters: int. 65 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering). 66 67 cluster_encode: bool. 68 defines how the variable containing clusters is treated (default is one-hot) 69 if `False`, then labels are used, without one-hot encoding. 70 71 type_clust: str. 72 type of clustering method: currently k-means ('kmeans') or Gaussian 73 Mixture Model ('gmm'). 74 75 type_scaling: a tuple of 3 strings. 76 scaling methods for inputs, hidden layer, and clustering respectively 77 (and when relevant). 78 Currently available: standardization ('std') or MinMax scaling ('minmax'). 79 80 lags: int. 81 number of lags used for each time series. 82 If string, lags must be one of 'AIC', 'AICc', or 'BIC'. 83 84 type_pi: str. 85 type of prediction interval; currently: 86 - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case 87 - "quantile": use model-agnostic quantile regression under the hood 88 - "kde": based on Kernel Density Estimation of in-sample residuals 89 - "bootstrap": based on independent bootstrap of in-sample residuals 90 - "block-bootstrap": based on basic block bootstrap of in-sample residuals 91 - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals 92 - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals 93 - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals 94 - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals 95 - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals 96 - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals 97 - based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton', 98 'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student' 99 - 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton', 100 'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student' 101 - 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton', 102 'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student' 103 104 level: int. 105 level of confidence for `type_pi == 'quantile'` (default is `95`) 106 107 block_size: int. 108 size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap"). 109 Default is round(3.15*(n_residuals^1/3)) 110 111 replications: int. 112 number of replications (if needed, for predictive simulation). Default is 'None'. 113 114 kernel: str. 115 the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'. 116 117 agg: str. 118 either "mean" or "median" for simulation of bootstrap aggregating 119 120 seed: int. 121 reproducibility seed for nodes_sim=='uniform' or predictive simulation. 122 123 backend: str. 124 "cpu" or "gpu" or "tpu". 125 126 verbose: int. 127 0: not printing; 1: printing 128 129 show_progress: bool. 130 True: progress bar when fitting each series; False: no progress bar when fitting each series 131 132 Attributes: 133 134 fit_objs_: dict 135 objects adjusted to each individual time series 136 137 y_: {array-like} 138 MTS responses (most recent observations first) 139 140 X_: {array-like} 141 MTS lags 142 143 xreg_: {array-like} 144 external regressors 145 146 y_means_: dict 147 a dictionary of each series mean values 148 149 preds_: {array-like} 150 successive model predictions 151 152 preds_std_: {array-like} 153 standard deviation around the predictions for Bayesian base learners (`obj`) 154 155 gaussian_preds_std_: {array-like} 156 standard deviation around the predictions for `type_pi='gaussian'` 157 158 return_std_: boolean 159 return uncertainty or not (set in predict) 160 161 df_: data frame 162 the input data frame, in case a data.frame is provided to `fit` 163 164 n_obs_: int 165 number of time series observations (number of rows for multivariate) 166 167 level_: int 168 level of confidence for prediction intervals (default is 95) 169 170 residuals_: {array-like} 171 in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals 172 (for `type_pi` in conformal prediction) 173 174 residuals_sims_: tuple of {array-like} 175 simulations of in-sample residuals (for `type_pi` not conformal prediction) or 176 calibrated residuals (for `type_pi` in conformal prediction) 177 178 kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html 179 180 residuals_std_dev_: residuals standard deviation 181 182 Examples: 183 184 Example 1: 185 186 ```python 187 import nnetsauce as ns 188 import numpy as np 189 from sklearn import linear_model 190 np.random.seed(123) 191 192 M = np.random.rand(10, 3) 193 M[:,0] = 10*M[:,0] 194 M[:,2] = 25*M[:,2] 195 print(M) 196 197 # Adjust Bayesian Ridge 198 regr4 = linear_model.BayesianRidge() 199 obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5) 200 obj_MTS.fit(M) 201 print(obj_MTS.predict()) 202 203 # with credible intervals 204 print(obj_MTS.predict(return_std=True, level=80)) 205 206 print(obj_MTS.predict(return_std=True, level=95)) 207 ``` 208 209 Example 2: 210 211 ```python 212 import nnetsauce as ns 213 import numpy as np 214 from sklearn import linear_model 215 216 dataset = { 217 'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'], 218 'series1' : [34, 30, 35.6, 33.3, 38.1], 219 'series2' : [4, 5.5, 5.6, 6.3, 5.1], 220 'series3' : [100, 100.5, 100.6, 100.2, 100.1]} 221 df = pd.DataFrame(dataset).set_index('date') 222 print(df) 223 224 # Adjust Bayesian Ridge 225 regr5 = linear_model.BayesianRidge() 226 obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5) 227 obj_MTS.fit(df) 228 print(obj_MTS.predict()) 229 230 # with credible intervals 231 print(obj_MTS.predict(return_std=True, level=80)) 232 233 print(obj_MTS.predict(return_std=True, level=95)) 234 ``` 235 """ 236 237 # construct the object ----- 238 239 def __init__( 240 self, 241 obj, 242 n_hidden_features=5, 243 activation_name="relu", 244 a=0.01, 245 nodes_sim="sobol", 246 bias=True, 247 dropout=0, 248 direct_link=True, 249 n_clusters=2, 250 cluster_encode=True, 251 type_clust="kmeans", 252 type_scaling=("std", "std", "std"), 253 lags=1, 254 type_pi="kde", 255 level=95, 256 block_size=None, 257 replications=None, 258 kernel="gaussian", 259 agg="mean", 260 seed=123, 261 backend="cpu", 262 verbose=0, 263 show_progress=True, 264 ): 265 super().__init__( 266 n_hidden_features=n_hidden_features, 267 activation_name=activation_name, 268 a=a, 269 nodes_sim=nodes_sim, 270 bias=bias, 271 dropout=dropout, 272 direct_link=direct_link, 273 n_clusters=n_clusters, 274 cluster_encode=cluster_encode, 275 type_clust=type_clust, 276 type_scaling=type_scaling, 277 seed=seed, 278 backend=backend, 279 ) 280 281 # Add validation for lags parameter 282 if isinstance(lags, str): 283 assert lags in ( 284 "AIC", 285 "AICc", 286 "BIC", 287 ), "if string, lags must be one of 'AIC', 'AICc', or 'BIC'" 288 else: 289 assert ( 290 int(lags) == lags 291 ), "if numeric, lags parameter should be an integer" 292 293 self.obj = obj 294 self.n_series = None 295 self.lags = lags 296 self.type_pi = type_pi 297 self.level = level 298 if self.type_pi == "quantile": 299 self.obj = QuantileRegressor( 300 self.obj, level=self.level, scoring="conformal" 301 ) 302 self.block_size = block_size 303 self.replications = replications 304 self.kernel = kernel 305 self.agg = agg 306 self.verbose = verbose 307 self.show_progress = show_progress 308 self.series_names = ["series0"] 309 self.input_dates = None 310 self.quantiles = None 311 self.fit_objs_ = {} 312 self.y_ = None # MTS responses (most recent observations first) 313 self.X_ = None # MTS lags 314 self.xreg_ = None 315 self.y_means_ = {} 316 self.mean_ = None 317 self.median_ = None 318 self.upper_ = None 319 self.lower_ = None 320 self.output_dates_ = None 321 self.preds_std_ = [] 322 self.gaussian_preds_std_ = None 323 self.alpha_ = None 324 self.return_std_ = None 325 self.df_ = None 326 self.residuals_ = [] 327 self.abs_calib_residuals_ = None 328 self.calib_residuals_quantile_ = None 329 self.residuals_sims_ = None 330 self.kde_ = None 331 self.sims_ = None 332 self.residuals_std_dev_ = None 333 self.n_obs_ = None 334 self.level_ = None 335 self.init_n_series_ = None 336 337 def fit(self, X, xreg=None, **kwargs): 338 """Fit MTS model to training data X, with optional regressors xreg 339 340 Parameters: 341 342 X: {array-like}, shape = [n_samples, n_features] 343 Training time series, where n_samples is the number 344 of samples and n_features is the number of features; 345 X must be in increasing order (most recent observations last) 346 347 xreg: {array-like}, shape = [n_samples, n_features_xreg] 348 Additional (external) regressors to be passed to self.obj 349 xreg must be in 'increasing' order (most recent observations last) 350 351 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 352 353 Returns: 354 355 self: object 356 """ 357 try: 358 self.init_n_series_ = X.shape[1] 359 except IndexError as e: 360 self.init_n_series_ = 1 361 362 # Automatic lag selection if requested 363 if isinstance(self.lags, str): 364 max_lags = min(25, X.shape[0] // 4) 365 best_ic = float("inf") 366 best_lags = 1 367 368 if self.verbose: 369 print( 370 f"\nSelecting optimal number of lags using {self.lags}..." 371 ) 372 iterator = tqdm(range(1, max_lags + 1)) 373 else: 374 iterator = range(1, max_lags + 1) 375 376 for lag in iterator: 377 # Convert DataFrame to numpy array before reversing 378 if isinstance(X, pd.DataFrame): 379 X_values = X.values[::-1] 380 else: 381 X_values = X[::-1] 382 383 # Try current lag value 384 if self.init_n_series_ > 1: 385 mts_input = ts.create_train_inputs(X_values, lag) 386 else: 387 mts_input = ts.create_train_inputs( 388 X_values.reshape(-1, 1), lag 389 ) 390 391 # Cook training set and fit model 392 dummy_y, scaled_Z = self.cook_training_set( 393 y=np.ones(mts_input[0].shape[0]), X=mts_input[1] 394 ) 395 residuals_ = [] 396 397 for i in range(self.init_n_series_): 398 y_mean = np.mean(mts_input[0][:, i]) 399 centered_y_i = mts_input[0][:, i] - y_mean 400 self.obj.fit(X=scaled_Z, y=centered_y_i) 401 residuals_.append( 402 (centered_y_i - self.obj.predict(scaled_Z)).tolist() 403 ) 404 405 self.residuals_ = np.asarray(residuals_).T 406 ic = self._compute_information_criterion( 407 curr_lags=lag, criterion=self.lags 408 ) 409 410 if self.verbose: 411 print(f"Trying lags={lag}, {self.lags}={ic:.2f}") 412 413 if ic < best_ic: 414 best_ic = ic 415 best_lags = lag 416 417 if self.verbose: 418 print( 419 f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}" 420 ) 421 422 self.lags = best_lags 423 424 self.input_dates = None 425 self.df_ = None 426 427 if isinstance(X, pd.DataFrame) is False: 428 # input data set is a numpy array 429 if xreg is None: 430 X = pd.DataFrame(X) 431 self.series_names = [ 432 "series" + str(i) for i in range(X.shape[1]) 433 ] 434 else: 435 # xreg is not None 436 X = mo.cbind(X, xreg) 437 self.xreg_ = xreg 438 439 else: # input data set is a DataFrame with column names 440 X_index = None 441 if X.index is not None: 442 X_index = X.index 443 if xreg is None: 444 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 445 else: 446 X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg)) 447 self.xreg_ = xreg 448 if X_index is not None: 449 X.index = X_index 450 self.series_names = X.columns.tolist() 451 452 if isinstance(X, pd.DataFrame): 453 if self.df_ is None: 454 self.df_ = X 455 X = X.values 456 else: 457 input_dates_prev = pd.DatetimeIndex(self.df_.index.values) 458 frequency = pd.infer_freq(input_dates_prev) 459 self.df_ = pd.concat([self.df_, X], axis=0) 460 self.input_dates = pd.date_range( 461 start=input_dates_prev[0], 462 periods=len(input_dates_prev) + X.shape[0], 463 freq=frequency, 464 ).values.tolist() 465 self.df_.index = self.input_dates 466 X = self.df_.values 467 self.df_.columns = self.series_names 468 else: 469 if self.df_ is None: 470 self.df_ = pd.DataFrame(X, columns=self.series_names) 471 else: 472 self.df_ = pd.concat( 473 [self.df_, pd.DataFrame(X, columns=self.series_names)], 474 axis=0, 475 ) 476 477 self.input_dates = ts.compute_input_dates(self.df_) 478 479 try: 480 # multivariate time series 481 n, p = X.shape 482 except: 483 # univariate time series 484 n = X.shape[0] 485 p = 1 486 self.n_obs_ = n 487 488 rep_1_n = np.repeat(1, n) 489 490 self.y_ = None 491 self.X_ = None 492 self.n_series = p 493 self.fit_objs_.clear() 494 self.y_means_.clear() 495 residuals_ = [] 496 self.residuals_ = None 497 self.residuals_sims_ = None 498 self.kde_ = None 499 self.sims_ = None 500 self.scaled_Z_ = None 501 self.centered_y_is_ = [] 502 503 if self.init_n_series_ > 1: 504 # multivariate time series 505 mts_input = ts.create_train_inputs(X[::-1], self.lags) 506 else: 507 # univariate time series 508 mts_input = ts.create_train_inputs( 509 X.reshape(-1, 1)[::-1], self.lags 510 ) 511 512 self.y_ = mts_input[0] 513 514 self.X_ = mts_input[1] 515 516 dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_) 517 518 self.scaled_Z_ = scaled_Z 519 520 # loop on all the time series and adjust self.obj.fit 521 if self.verbose > 0: 522 print( 523 f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n" 524 ) 525 526 if self.show_progress is True: 527 iterator = tqdm(range(self.init_n_series_)) 528 else: 529 iterator = range(self.init_n_series_) 530 531 if self.type_pi in ( 532 "gaussian", 533 "kde", 534 "bootstrap", 535 "block-bootstrap", 536 ) or self.type_pi.startswith("vine"): 537 for i in iterator: 538 y_mean = np.mean(self.y_[:, i]) 539 self.y_means_[i] = y_mean 540 centered_y_i = self.y_[:, i] - y_mean 541 self.centered_y_is_.append(centered_y_i) 542 self.obj.fit(X=scaled_Z, y=centered_y_i) 543 self.fit_objs_[i] = deepcopy(self.obj) 544 residuals_.append( 545 ( 546 centered_y_i - self.fit_objs_[i].predict(scaled_Z) 547 ).tolist() 548 ) 549 550 if self.type_pi == "quantile": 551 for i in iterator: 552 y_mean = np.mean(self.y_[:, i]) 553 self.y_means_[i] = y_mean 554 centered_y_i = self.y_[:, i] - y_mean 555 self.centered_y_is_.append(centered_y_i) 556 self.obj.fit(X=scaled_Z, y=centered_y_i) 557 self.fit_objs_[i] = deepcopy(self.obj) 558 559 if self.type_pi.startswith("scp"): 560 # split conformal prediction 561 for i in iterator: 562 n_y = self.y_.shape[0] 563 n_y_half = n_y // 2 564 first_half_idx = range(0, n_y_half) 565 second_half_idx = range(n_y_half, n_y) 566 y_mean_temp = np.mean(self.y_[first_half_idx, i]) 567 centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp 568 self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp) 569 # calibrated residuals actually 570 residuals_.append( 571 ( 572 self.y_[second_half_idx, i] 573 - ( 574 y_mean_temp 575 + self.obj.predict(scaled_Z[second_half_idx, :]) 576 ) 577 ).tolist() 578 ) 579 # fit on the second half 580 y_mean = np.mean(self.y_[second_half_idx, i]) 581 self.y_means_[i] = y_mean 582 centered_y_i = self.y_[second_half_idx, i] - y_mean 583 self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i) 584 self.fit_objs_[i] = deepcopy(self.obj) 585 586 self.residuals_ = np.asarray(residuals_).T 587 588 if self.type_pi == "gaussian": 589 self.gaussian_preds_std_ = np.std(self.residuals_, axis=0) 590 591 if self.type_pi.startswith("scp2"): 592 # Calculate mean and standard deviation for each column 593 data_mean = np.mean(self.residuals_, axis=0) 594 self.residuals_std_dev_ = np.std(self.residuals_, axis=0) 595 # Center and scale the array using broadcasting 596 self.residuals_ = ( 597 self.residuals_ - data_mean[np.newaxis, :] 598 ) / self.residuals_std_dev_[np.newaxis, :] 599 600 if self.replications != None and "kde" in self.type_pi: 601 if self.verbose > 0: 602 print(f"\n Simulate residuals using {self.kernel} kernel... \n") 603 assert self.kernel in ( 604 "gaussian", 605 "tophat", 606 ), "currently, 'kernel' must be either 'gaussian' or 'tophat'" 607 kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)} 608 grid = GridSearchCV( 609 KernelDensity(kernel=self.kernel, **kwargs), 610 param_grid=kernel_bandwidths, 611 ) 612 grid.fit(self.residuals_) 613 614 if self.verbose > 0: 615 print( 616 f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n" 617 ) 618 619 self.kde_ = grid.best_estimator_ 620 621 return self 622 623 def partial_fit(self, X, xreg=None, **kwargs): 624 """partial_fit MTS model to training data X, with optional regressors xreg 625 626 Parameters: 627 628 X: {array-like}, shape = [n_samples, n_features] 629 Training time series, where n_samples is the number 630 of samples and n_features is the number of features; 631 X must be in increasing order (most recent observations last) 632 633 xreg: {array-like}, shape = [n_samples, n_features_xreg] 634 Additional (external) regressors to be passed to self.obj 635 xreg must be in 'increasing' order (most recent observations last) 636 637 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 638 639 Returns: 640 641 self: object 642 """ 643 try: 644 self.init_n_series_ = X.shape[1] 645 except IndexError as e: 646 self.init_n_series_ = 1 647 648 # Automatic lag selection if requested 649 if isinstance(self.lags, str): 650 max_lags = min(25, X.shape[0] // 4) 651 best_ic = float("inf") 652 best_lags = 1 653 654 if self.verbose: 655 print( 656 f"\nSelecting optimal number of lags using {self.lags}..." 657 ) 658 iterator = tqdm(range(1, max_lags + 1)) 659 else: 660 iterator = range(1, max_lags + 1) 661 662 for lag in iterator: 663 # Convert DataFrame to numpy array before reversing 664 if isinstance(X, pd.DataFrame): 665 X_values = X.values[::-1] 666 else: 667 X_values = X[::-1] 668 669 # Try current lag value 670 if self.init_n_series_ > 1: 671 mts_input = ts.create_train_inputs(X_values, lag) 672 else: 673 mts_input = ts.create_train_inputs( 674 X_values.reshape(-1, 1), lag 675 ) 676 677 # Cook training set and partial_fit model 678 dummy_y, scaled_Z = self.cook_training_set( 679 y=np.ones(mts_input[0].shape[0]), X=mts_input[1] 680 ) 681 residuals_ = [] 682 683 for i in range(self.init_n_series_): 684 y_mean = np.mean(mts_input[0][:, i]) 685 centered_y_i = mts_input[0][:, i] - y_mean 686 self.obj.partial_fit(X=scaled_Z, y=centered_y_i) 687 residuals_.append( 688 (centered_y_i - self.obj.predict(scaled_Z)).tolist() 689 ) 690 691 self.residuals_ = np.asarray(residuals_).T 692 ic = self._compute_information_criterion( 693 curr_lags=lag, criterion=self.lags 694 ) 695 696 if self.verbose: 697 print(f"Trying lags={lag}, {self.lags}={ic:.2f}") 698 699 if ic < best_ic: 700 best_ic = ic 701 best_lags = lag 702 703 if self.verbose: 704 print( 705 f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}" 706 ) 707 708 self.lags = best_lags 709 710 self.input_dates = None 711 self.df_ = None 712 713 if isinstance(X, pd.DataFrame) is False: 714 # input data set is a numpy array 715 if xreg is None: 716 X = pd.DataFrame(X) 717 if len(X.shape) > 1: 718 self.series_names = [ 719 "series" + str(i) for i in range(X.shape[1]) 720 ] 721 else: 722 self.series_names = ["series0"] 723 else: 724 # xreg is not None 725 X = mo.cbind(X, xreg) 726 self.xreg_ = xreg 727 728 else: # input data set is a DataFrame with column names 729 X_index = None 730 if X.index is not None: 731 X_index = X.index 732 if xreg is None: 733 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 734 else: 735 X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg)) 736 self.xreg_ = xreg 737 if X_index is not None: 738 X.index = X_index 739 self.series_names = X.columns.tolist() 740 741 if isinstance(X, pd.DataFrame): 742 if self.df_ is None: 743 self.df_ = X 744 X = X.values 745 else: 746 input_dates_prev = pd.DatetimeIndex(self.df_.index.values) 747 frequency = pd.infer_freq(input_dates_prev) 748 self.df_ = pd.concat([self.df_, X], axis=0) 749 self.input_dates = pd.date_range( 750 start=input_dates_prev[0], 751 periods=len(input_dates_prev) + X.shape[0], 752 freq=frequency, 753 ).values.tolist() 754 self.df_.index = self.input_dates 755 X = self.df_.values 756 self.df_.columns = self.series_names 757 else: 758 if self.df_ is None: 759 self.df_ = pd.DataFrame(X, columns=self.series_names) 760 else: 761 self.df_ = pd.concat( 762 [self.df_, pd.DataFrame(X, columns=self.series_names)], 763 axis=0, 764 ) 765 766 self.input_dates = ts.compute_input_dates(self.df_) 767 768 try: 769 # multivariate time series 770 n, p = X.shape 771 except: 772 # univariate time series 773 n = X.shape[0] 774 p = 1 775 self.n_obs_ = n 776 777 rep_1_n = np.repeat(1, n) 778 779 self.y_ = None 780 self.X_ = None 781 self.n_series = p 782 self.fit_objs_.clear() 783 self.y_means_.clear() 784 residuals_ = [] 785 self.residuals_ = None 786 self.residuals_sims_ = None 787 self.kde_ = None 788 self.sims_ = None 789 self.scaled_Z_ = None 790 self.centered_y_is_ = [] 791 792 if self.init_n_series_ > 1: 793 # multivariate time series 794 mts_input = ts.create_train_inputs(X[::-1], self.lags) 795 else: 796 # univariate time series 797 mts_input = ts.create_train_inputs( 798 X.reshape(-1, 1)[::-1], self.lags 799 ) 800 801 self.y_ = mts_input[0] 802 803 self.X_ = mts_input[1] 804 805 dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_) 806 807 self.scaled_Z_ = scaled_Z 808 809 # loop on all the time series and adjust self.obj.partial_fit 810 if self.verbose > 0: 811 print( 812 f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n" 813 ) 814 815 if self.show_progress is True: 816 iterator = tqdm(range(self.init_n_series_)) 817 else: 818 iterator = range(self.init_n_series_) 819 820 if self.type_pi in ( 821 "gaussian", 822 "kde", 823 "bootstrap", 824 "block-bootstrap", 825 ) or self.type_pi.startswith("vine"): 826 for i in iterator: 827 y_mean = np.mean(self.y_[:, i]) 828 self.y_means_[i] = y_mean 829 centered_y_i = self.y_[:, i] - y_mean 830 self.centered_y_is_.append(centered_y_i) 831 self.obj.partial_fit(X=scaled_Z, y=centered_y_i) 832 self.fit_objs_[i] = deepcopy(self.obj) 833 residuals_.append( 834 ( 835 centered_y_i - self.fit_objs_[i].predict(scaled_Z) 836 ).tolist() 837 ) 838 839 if self.type_pi == "quantile": 840 for i in iterator: 841 y_mean = np.mean(self.y_[:, i]) 842 self.y_means_[i] = y_mean 843 centered_y_i = self.y_[:, i] - y_mean 844 self.centered_y_is_.append(centered_y_i) 845 self.obj.partial_fit(X=scaled_Z, y=centered_y_i) 846 self.fit_objs_[i] = deepcopy(self.obj) 847 848 if self.type_pi.startswith("scp"): 849 # split conformal prediction 850 for i in iterator: 851 n_y = self.y_.shape[0] 852 n_y_half = n_y // 2 853 first_half_idx = range(0, n_y_half) 854 second_half_idx = range(n_y_half, n_y) 855 y_mean_temp = np.mean(self.y_[first_half_idx, i]) 856 centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp 857 self.obj.partial_fit( 858 X=scaled_Z[first_half_idx, :], y=centered_y_i_temp 859 ) 860 # calibrated residuals actually 861 residuals_.append( 862 ( 863 self.y_[second_half_idx, i] 864 - ( 865 y_mean_temp 866 + self.obj.predict(scaled_Z[second_half_idx, :]) 867 ) 868 ).tolist() 869 ) 870 # partial_fit on the second half 871 y_mean = np.mean(self.y_[second_half_idx, i]) 872 self.y_means_[i] = y_mean 873 centered_y_i = self.y_[second_half_idx, i] - y_mean 874 self.obj.partial_fit( 875 X=scaled_Z[second_half_idx, :], y=centered_y_i 876 ) 877 self.fit_objs_[i] = deepcopy(self.obj) 878 879 self.residuals_ = np.asarray(residuals_).T 880 881 if self.type_pi == "gaussian": 882 self.gaussian_preds_std_ = np.std(self.residuals_, axis=0) 883 884 if self.type_pi.startswith("scp2"): 885 # Calculate mean and standard deviation for each column 886 data_mean = np.mean(self.residuals_, axis=0) 887 self.residuals_std_dev_ = np.std(self.residuals_, axis=0) 888 # Center and scale the array using broadcasting 889 self.residuals_ = ( 890 self.residuals_ - data_mean[np.newaxis, :] 891 ) / self.residuals_std_dev_[np.newaxis, :] 892 893 if self.replications != None and "kde" in self.type_pi: 894 if self.verbose > 0: 895 print(f"\n Simulate residuals using {self.kernel} kernel... \n") 896 assert self.kernel in ( 897 "gaussian", 898 "tophat", 899 ), "currently, 'kernel' must be either 'gaussian' or 'tophat'" 900 kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)} 901 grid = GridSearchCV( 902 KernelDensity(kernel=self.kernel, **kwargs), 903 param_grid=kernel_bandwidths, 904 ) 905 grid.fit(self.residuals_) 906 907 if self.verbose > 0: 908 print( 909 f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n" 910 ) 911 912 self.kde_ = grid.best_estimator_ 913 914 return self 915 916 def _predict_quantiles(self, h, quantiles, **kwargs): 917 """Predict arbitrary quantiles from simulated paths.""" 918 # Ensure output dates are set 919 self.output_dates_, _ = ts.compute_output_dates(self.df_, h) 920 921 # Trigger full prediction to generate self.sims_ 922 if not hasattr(self, "sims_") or self.sims_ is None: 923 _ = self.predict(h=h, level=95, **kwargs) # Any level triggers sim 924 925 result_dict = {} 926 927 # Stack simulations: (R, h, n_series) 928 sims_array = np.stack([sim.values for sim in self.sims_], axis=0) 929 930 # Compute quantiles over replication axis 931 q_values = np.quantile( 932 sims_array, quantiles, axis=0 933 ) # (n_q, h, n_series) 934 935 for i, q in enumerate(quantiles): 936 # Clean label: 0.05 → "05", 0.1 → "10", 0.95 → "95" 937 q_label = ( 938 f"{int(q * 100):02d}" 939 if (q * 100).is_integer() 940 else f"{q:.3f}".replace(".", "_") 941 ) 942 for series_id in range(self.init_n_series_): 943 series_name = self.series_names[series_id] 944 col_name = f"quantile_{q_label}_{series_name}" 945 result_dict[col_name] = q_values[i, :, series_id] 946 947 df_return_quantiles = pd.DataFrame( 948 result_dict, index=self.output_dates_ 949 ) 950 951 return df_return_quantiles 952 953 def predict(self, h=5, level=95, quantiles=None, **kwargs): 954 """Forecast all the time series, h steps ahead""" 955 956 if quantiles is not None: 957 # Validate 958 quantiles = np.asarray(quantiles) 959 if not ((quantiles > 0) & (quantiles < 1)).all(): 960 raise ValueError("quantiles must be between 0 and 1.") 961 # Delegate to dedicated method 962 return self._predict_quantiles(h=h, quantiles=quantiles, **kwargs) 963 964 if isinstance(level, list) or isinstance(level, np.ndarray): 965 # Store results 966 result_dict = {} 967 # Loop through alphas and calculate lower/upper for each alpha level 968 # E.g [0.5, 2.5, 5, 16.5, 25, 50] 969 for lev in level: 970 # Get the forecast for this alpha 971 res = self.predict(h=h, level=lev, **kwargs) 972 # Adjust index and collect lower/upper bounds 973 res.lower.index = pd.to_datetime(res.lower.index) 974 res.upper.index = pd.to_datetime(res.upper.index) 975 # Loop over each time series (multivariate) and flatten results 976 if isinstance(res.lower, pd.DataFrame): 977 for ( 978 series 979 ) in ( 980 res.lower.columns 981 ): # Assumes 'lower' and 'upper' have multiple series 982 result_dict[f"lower_{lev}_{series}"] = ( 983 res.lower[series].to_numpy().flatten() 984 ) 985 result_dict[f"upper_{lev}_{series}"] = ( 986 res.upper[series].to_numpy().flatten() 987 ) 988 else: 989 for series_id in range( 990 self.n_series 991 ): # Assumes 'lower' and 'upper' have multiple series 992 result_dict[f"lower_{lev}_{series_id}"] = ( 993 res.lower[series_id, :].to_numpy().flatten() 994 ) 995 result_dict[f"upper_{lev}_{series_id}"] = ( 996 res.upper[series_id, :].to_numpy().flatten() 997 ) 998 return pd.DataFrame(result_dict, index=self.output_dates_) 999 1000 # only one prediction interval 1001 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 1002 1003 self.level_ = level 1004 1005 self.return_std_ = False # do not remove (/!\) 1006 1007 self.mean_ = None # do not remove (/!\) 1008 1009 self.mean_ = deepcopy(self.y_) # do not remove (/!\) 1010 1011 self.lower_ = None # do not remove (/!\) 1012 1013 self.upper_ = None # do not remove (/!\) 1014 1015 self.sims_ = None # do not remove (/!\) 1016 1017 y_means_ = np.asarray( 1018 [self.y_means_[i] for i in range(self.init_n_series_)] 1019 ) 1020 1021 n_features = self.init_n_series_ * self.lags 1022 1023 self.alpha_ = 100 - level 1024 1025 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 1026 1027 if "return_std" in kwargs: # bayesian forecasting 1028 self.return_std_ = True 1029 self.preds_std_ = [] 1030 DescribeResult = namedtuple( 1031 "DescribeResult", ("mean", "lower", "upper") 1032 ) # to be updated 1033 1034 if "return_pi" in kwargs: # split conformal, without simulation 1035 mean_pi_ = [] 1036 lower_pi_ = [] 1037 upper_pi_ = [] 1038 median_pi_ = [] 1039 DescribeResult = namedtuple( 1040 "DescribeResult", ("mean", "lower", "upper") 1041 ) # to be updated 1042 1043 if self.kde_ != None and "kde" in self.type_pi: # kde 1044 target_cols = self.df_.columns[ 1045 : self.init_n_series_ 1046 ] # Get target column names 1047 if self.verbose == 1: 1048 self.residuals_sims_ = tuple( 1049 self.kde_.sample( 1050 n_samples=h, random_state=self.seed + 100 * i 1051 ) # Keep full sample 1052 for i in tqdm(range(self.replications)) 1053 ) 1054 elif self.verbose == 0: 1055 self.residuals_sims_ = tuple( 1056 self.kde_.sample( 1057 n_samples=h, random_state=self.seed + 100 * i 1058 ) # Keep full sample 1059 for i in range(self.replications) 1060 ) 1061 1062 # Convert to DataFrames after sampling 1063 self.residuals_sims_ = tuple( 1064 pd.DataFrame( 1065 sim, # Keep all columns 1066 columns=target_cols, # Use original target column names 1067 index=self.output_dates_, 1068 ) 1069 for sim in self.residuals_sims_ 1070 ) 1071 1072 if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"): 1073 assert self.replications is not None and isinstance( 1074 self.replications, int 1075 ), "'replications' must be provided and be an integer" 1076 if self.verbose == 1: 1077 self.residuals_sims_ = tuple( 1078 ts.bootstrap( 1079 self.residuals_, 1080 h=h, 1081 block_size=None, 1082 seed=self.seed + 100 * i, 1083 ) 1084 for i in tqdm(range(self.replications)) 1085 ) 1086 elif self.verbose == 0: 1087 self.residuals_sims_ = tuple( 1088 ts.bootstrap( 1089 self.residuals_, 1090 h=h, 1091 block_size=None, 1092 seed=self.seed + 100 * i, 1093 ) 1094 for i in range(self.replications) 1095 ) 1096 1097 if self.type_pi in ( 1098 "block-bootstrap", 1099 "scp-block-bootstrap", 1100 "scp2-block-bootstrap", 1101 ): 1102 if self.block_size is None: 1103 self.block_size = int( 1104 np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3))) 1105 ) 1106 1107 assert self.replications is not None and isinstance( 1108 self.replications, int 1109 ), "'replications' must be provided and be an integer" 1110 if self.verbose == 1: 1111 self.residuals_sims_ = tuple( 1112 ts.bootstrap( 1113 self.residuals_, 1114 h=h, 1115 block_size=self.block_size, 1116 seed=self.seed + 100 * i, 1117 ) 1118 for i in tqdm(range(self.replications)) 1119 ) 1120 elif self.verbose == 0: 1121 self.residuals_sims_ = tuple( 1122 ts.bootstrap( 1123 self.residuals_, 1124 h=h, 1125 block_size=self.block_size, 1126 seed=self.seed + 100 * i, 1127 ) 1128 for i in range(self.replications) 1129 ) 1130 1131 if "vine" in self.type_pi: 1132 if self.verbose == 1: 1133 self.residuals_sims_ = tuple( 1134 vinecopula_sample( 1135 x=self.residuals_, 1136 n_samples=h, 1137 method=self.type_pi, 1138 random_state=self.seed + 100 * i, 1139 ) 1140 for i in tqdm(range(self.replications)) 1141 ) 1142 elif self.verbose == 0: 1143 self.residuals_sims_ = tuple( 1144 vinecopula_sample( 1145 x=self.residuals_, 1146 n_samples=h, 1147 method=self.type_pi, 1148 random_state=self.seed + 100 * i, 1149 ) 1150 for i in range(self.replications) 1151 ) 1152 1153 mean_ = deepcopy(self.mean_) 1154 1155 for i in range(h): 1156 new_obs = ts.reformat_response(mean_, self.lags) 1157 new_X = new_obs.reshape(1, -1) 1158 cooked_new_X = self.cook_test_set(new_X, **kwargs) 1159 1160 if "return_std" in kwargs: 1161 self.preds_std_.append( 1162 [ 1163 np.asarray( 1164 self.fit_objs_[i].predict( 1165 cooked_new_X, return_std=True 1166 )[1] 1167 ).item() 1168 for i in range(self.n_series) 1169 ] 1170 ) 1171 1172 if "return_pi" in kwargs: 1173 for i in range(self.n_series): 1174 preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs) 1175 mean_pi_.append(preds_pi.mean[0]) 1176 lower_pi_.append(preds_pi.lower[0]) 1177 upper_pi_.append(preds_pi.upper[0]) 1178 1179 if self.type_pi != "quantile": 1180 predicted_cooked_new_X = np.asarray( 1181 [ 1182 np.asarray( 1183 self.fit_objs_[i].predict(cooked_new_X) 1184 ).item() 1185 for i in range(self.init_n_series_) 1186 ] 1187 ) 1188 else: 1189 predicted_cooked_new_X = np.asarray( 1190 [ 1191 np.asarray( 1192 self.fit_objs_[i] 1193 .predict(cooked_new_X, return_pi=True) 1194 .upper 1195 ).item() 1196 for i in range(self.init_n_series_) 1197 ] 1198 ) 1199 1200 preds = np.asarray(y_means_ + predicted_cooked_new_X) 1201 1202 # Create full row with both predictions and external regressors 1203 if self.xreg_ is not None and "xreg" in kwargs: 1204 next_xreg = kwargs["xreg"].iloc[i: i + 1].values.flatten() 1205 full_row = np.concatenate([preds, next_xreg]) 1206 else: 1207 full_row = preds 1208 1209 # Create a new row with same number of columns as mean_ 1210 new_row = np.zeros((1, mean_.shape[1])) 1211 new_row[0, : full_row.shape[0]] = full_row 1212 1213 # Maintain the full dimensionality by using vstack instead of rbind 1214 mean_ = np.vstack([new_row, mean_[:-1]]) 1215 1216 # Final output should only include the target columns 1217 self.mean_ = pd.DataFrame( 1218 mean_[0: min(h, self.n_obs_ - self.lags), : self.init_n_series_][ 1219 ::-1 1220 ], 1221 columns=self.df_.columns[: self.init_n_series_], 1222 index=self.output_dates_, 1223 ) 1224 1225 # function's return ---------------------------------------------------------------------- 1226 if ( 1227 (("return_std" not in kwargs) and ("return_pi" not in kwargs)) 1228 and (self.type_pi not in ("gaussian", "scp")) 1229 ) or ("vine" in self.type_pi): 1230 if self.replications is None: 1231 return self.mean_.iloc[:, : self.init_n_series_] 1232 1233 # if "return_std" not in kwargs and self.replications is not None 1234 meanf = [] 1235 medianf = [] 1236 lower = [] 1237 upper = [] 1238 1239 if "scp2" in self.type_pi: 1240 if self.verbose == 1: 1241 self.sims_ = tuple( 1242 ( 1243 self.mean_ 1244 + self.residuals_sims_[i] 1245 * self.residuals_std_dev_[np.newaxis, :] 1246 for i in tqdm(range(self.replications)) 1247 ) 1248 ) 1249 elif self.verbose == 0: 1250 self.sims_ = tuple( 1251 ( 1252 self.mean_ 1253 + self.residuals_sims_[i] 1254 * self.residuals_std_dev_[np.newaxis, :] 1255 for i in range(self.replications) 1256 ) 1257 ) 1258 else: 1259 if self.verbose == 1: 1260 self.sims_ = tuple( 1261 ( 1262 self.mean_ + self.residuals_sims_[i] 1263 for i in tqdm(range(self.replications)) 1264 ) 1265 ) 1266 elif self.verbose == 0: 1267 self.sims_ = tuple( 1268 ( 1269 self.mean_ + self.residuals_sims_[i] 1270 for i in range(self.replications) 1271 ) 1272 ) 1273 1274 DescribeResult = namedtuple( 1275 "DescribeResult", ("mean", "sims", "lower", "upper") 1276 ) 1277 for ix in range(self.init_n_series_): 1278 sims_ix = getsims(self.sims_, ix) 1279 if self.agg == "mean": 1280 meanf.append(np.mean(sims_ix, axis=1)) 1281 else: 1282 medianf.append(np.median(sims_ix, axis=1)) 1283 lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1)) 1284 upper.append( 1285 np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1) 1286 ) 1287 self.mean_ = pd.DataFrame( 1288 np.asarray(meanf).T, 1289 columns=self.series_names[ 1290 : self.init_n_series_ 1291 ], # self.df_.columns, 1292 index=self.output_dates_, 1293 ) 1294 1295 self.lower_ = pd.DataFrame( 1296 np.asarray(lower).T, 1297 columns=self.series_names[ 1298 : self.init_n_series_ 1299 ], # self.df_.columns, 1300 index=self.output_dates_, 1301 ) 1302 1303 self.upper_ = pd.DataFrame( 1304 np.asarray(upper).T, 1305 columns=self.series_names[ 1306 : self.init_n_series_ 1307 ], # self.df_.columns, 1308 index=self.output_dates_, 1309 ) 1310 1311 try: 1312 self.median_ = pd.DataFrame( 1313 np.asarray(medianf).T, 1314 columns=self.series_names[ 1315 : self.init_n_series_ 1316 ], # self.df_.columns, 1317 index=self.output_dates_, 1318 ) 1319 except Exception as e: 1320 pass 1321 1322 return DescribeResult( 1323 self.mean_, self.sims_, self.lower_, self.upper_ 1324 ) 1325 1326 if ( 1327 (("return_std" in kwargs) or ("return_pi" in kwargs)) 1328 and (self.type_pi not in ("gaussian", "scp")) 1329 ) or "vine" in self.type_pi: 1330 DescribeResult = namedtuple( 1331 "DescribeResult", ("mean", "lower", "upper") 1332 ) 1333 1334 self.mean_ = pd.DataFrame( 1335 np.asarray(self.mean_), 1336 columns=self.series_names, # self.df_.columns, 1337 index=self.output_dates_, 1338 ) 1339 1340 if "return_std" in kwargs: 1341 self.preds_std_ = np.asarray(self.preds_std_) 1342 1343 self.lower_ = pd.DataFrame( 1344 self.mean_.values - pi_multiplier * self.preds_std_, 1345 columns=self.series_names, # self.df_.columns, 1346 index=self.output_dates_, 1347 ) 1348 1349 self.upper_ = pd.DataFrame( 1350 self.mean_.values + pi_multiplier * self.preds_std_, 1351 columns=self.series_names, # self.df_.columns, 1352 index=self.output_dates_, 1353 ) 1354 1355 if "return_pi" in kwargs: 1356 self.lower_ = pd.DataFrame( 1357 np.asarray(lower_pi_).reshape(h, self.n_series) 1358 + y_means_[np.newaxis, :], 1359 columns=self.series_names, # self.df_.columns, 1360 index=self.output_dates_, 1361 ) 1362 1363 self.upper_ = pd.DataFrame( 1364 np.asarray(upper_pi_).reshape(h, self.n_series) 1365 + y_means_[np.newaxis, :], 1366 columns=self.series_names, # self.df_.columns, 1367 index=self.output_dates_, 1368 ) 1369 1370 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1371 1372 if self.xreg_ is not None: 1373 if len(self.xreg_.shape) > 1: 1374 res2 = mx.tuple_map( 1375 res, 1376 lambda x: mo.delete_last_columns( 1377 x, num_columns=self.xreg_.shape[1] 1378 ), 1379 ) 1380 else: 1381 res2 = mx.tuple_map( 1382 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1383 ) 1384 return DescribeResult(res2[0], res2[1], res2[2]) 1385 1386 return res 1387 1388 if self.type_pi == "gaussian": 1389 DescribeResult = namedtuple( 1390 "DescribeResult", ("mean", "lower", "upper") 1391 ) 1392 1393 self.mean_ = pd.DataFrame( 1394 np.asarray(self.mean_), 1395 columns=self.series_names, # self.df_.columns, 1396 index=self.output_dates_, 1397 ) 1398 1399 # Use Bayesian std if available, otherwise use gaussian residual std 1400 if "return_std" in kwargs and len(self.preds_std_) > 0: 1401 preds_std_to_use = np.asarray(self.preds_std_) 1402 else: 1403 preds_std_to_use = self.gaussian_preds_std_ 1404 1405 self.lower_ = pd.DataFrame( 1406 self.mean_.values - pi_multiplier * preds_std_to_use, 1407 columns=self.series_names, # self.df_.columns, 1408 index=self.output_dates_, 1409 ) 1410 1411 self.upper_ = pd.DataFrame( 1412 self.mean_.values + pi_multiplier * preds_std_to_use, 1413 columns=self.series_names, # self.df_.columns, 1414 index=self.output_dates_, 1415 ) 1416 1417 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1418 1419 if self.xreg_ is not None: 1420 if len(self.xreg_.shape) > 1: 1421 res2 = mx.tuple_map( 1422 res, 1423 lambda x: mo.delete_last_columns( 1424 x, num_columns=self.xreg_.shape[1] 1425 ), 1426 ) 1427 else: 1428 res2 = mx.tuple_map( 1429 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1430 ) 1431 return DescribeResult(res2[0], res2[1], res2[2]) 1432 1433 return res 1434 1435 if self.type_pi == "quantile": 1436 DescribeResult = namedtuple("DescribeResult", ("mean")) 1437 1438 self.mean_ = pd.DataFrame( 1439 np.asarray(self.mean_), 1440 columns=self.series_names, # self.df_.columns, 1441 index=self.output_dates_, 1442 ) 1443 1444 res = DescribeResult(self.mean_) 1445 1446 if self.xreg_ is not None: 1447 if len(self.xreg_.shape) > 1: 1448 res2 = mx.tuple_map( 1449 res, 1450 lambda x: mo.delete_last_columns( 1451 x, num_columns=self.xreg_.shape[1] 1452 ), 1453 ) 1454 else: 1455 res2 = mx.tuple_map( 1456 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1457 ) 1458 return DescribeResult(res2[0]) 1459 1460 return res 1461 1462 # After prediction loop, ensure sims only contain target columns 1463 if self.sims_ is not None: 1464 if self.verbose == 1: 1465 self.sims_ = tuple( 1466 sim[:h,] # Only keep target columns and h rows 1467 for sim in tqdm(self.sims_) 1468 ) 1469 elif self.verbose == 0: 1470 self.sims_ = tuple( 1471 sim[:h,] # Only keep target columns and h rows 1472 for sim in self.sims_ 1473 ) 1474 1475 # Convert numpy arrays to DataFrames with proper columns 1476 self.sims_ = tuple( 1477 pd.DataFrame( 1478 sim, 1479 columns=self.df_.columns[: self.init_n_series_], 1480 index=self.output_dates_, 1481 ) 1482 for sim in self.sims_ 1483 ) 1484 1485 if self.type_pi in ( 1486 "kde", 1487 "bootstrap", 1488 "block-bootstrap", 1489 "vine-copula", 1490 ): 1491 if self.xreg_ is not None: 1492 # Use getsimsxreg when external regressors are present 1493 target_cols = self.df_.columns[: self.init_n_series_] 1494 self.sims_ = getsimsxreg( 1495 self.sims_, self.output_dates_, target_cols 1496 ) 1497 else: 1498 # Use original getsims for backward compatibility 1499 self.sims_ = getsims(self.sims_) 1500 1501 def _crps_ensemble(self, y_true, simulations, axis=0): 1502 """ 1503 Compute the Continuous Ranked Probability Score (CRPS) for an ensemble of simulations. 1504 1505 The CRPS is a measure of the distance between the cumulative distribution 1506 function (CDF) of a forecast and the CDF of the observed value. This method 1507 computes the CRPS in a vectorized form for an ensemble of simulations, efficiently 1508 handling the case where there is only one simulation. 1509 1510 Parameters 1511 ---------- 1512 y_true : array_like, shape (n,) 1513 A 1D array of true values (observations). 1514 Each element represents the true value for a given sample. 1515 1516 simulations : array_like, shape (n, R) 1517 A 2D array of simulated values. Each row corresponds to a different sample 1518 and each column corresponds to a different simulation of that sample. 1519 1520 axis : int, optional, default=0 1521 Axis along which to transpose the simulations if needed. 1522 If axis=0, the simulations are transposed to shape (R, n). 1523 1524 Returns 1525 ------- 1526 crps : ndarray, shape (n,) 1527 A 1D array of CRPS scores, one for each sample. 1528 1529 Notes 1530 ----- 1531 The CRPS score is computed as: 1532 1533 CRPS(y_true, simulations) = E[|X - y|] - 0.5 * E[|X - X'|] 1534 1535 Where: 1536 - `X` is the ensemble of simulations. 1537 - `y` is the true value. 1538 - `X'` is a second independent sample from the ensemble. 1539 1540 The calculation is vectorized to optimize performance for large datasets. 1541 1542 The edge case where `R=1` (only one simulation) is handled by returning 1543 only `term1` (i.e., no ensemble spread). 1544 """ 1545 sims = np.asarray(simulations) # Convert simulations to numpy array 1546 if axis == 0: 1547 sims = sims.T # Transpose if the axis is 0 1548 n, R = sims.shape # n = number of samples, R = number of simulations 1549 # Term 1: E|X - y|, average absolute difference between simulations and true value 1550 term1 = np.mean(np.abs(sims - y_true[:, np.newaxis]), axis=1) 1551 # Handle edge case: if R == 1, return term1 (no spread in ensemble) 1552 if R == 1: 1553 return term1 1554 # Term 2: 0.5 * E|X - X'|, using efficient sorted formula 1555 sims_sorted = np.sort(sims, axis=1) # Sort simulations along each row 1556 # Correct coefficients for efficient calculation 1557 j = np.arange(R) # 0-indexed positions in the sorted simulations 1558 coefficients = (2 * (j + 1) - R - 1) / ( 1559 R * (R - 1) 1560 ) # Efficient coefficient calculation 1561 # Dot product along the second axis (over the simulations) 1562 term2 = np.dot(sims_sorted, coefficients) 1563 # Return CRPS score: term1 - 0.5 * term2 1564 return term1 - 0.5 * term2 1565 1566 def score( 1567 self, 1568 X, 1569 training_index, 1570 testing_index, 1571 scoring=None, 1572 alpha=0.5, 1573 **kwargs, 1574 ): 1575 """Train on training_index, score on testing_index.""" 1576 1577 assert ( 1578 bool(set(training_index).intersection(set(testing_index))) == False 1579 ), "Non-overlapping 'training_index' and 'testing_index' required" 1580 1581 # Dimensions 1582 try: 1583 # multivariate time series 1584 n, p = X.shape 1585 except: 1586 # univariate time series 1587 n = X.shape[0] 1588 p = 1 1589 1590 # Training and testing sets 1591 if p > 1: 1592 X_train = X[training_index, :] 1593 X_test = X[testing_index, :] 1594 else: 1595 X_train = X[training_index] 1596 X_test = X[testing_index] 1597 1598 # Horizon 1599 h = len(testing_index) 1600 assert ( 1601 len(training_index) + h 1602 ) <= n, "Please check lengths of training and testing windows" 1603 1604 # Fit and predict 1605 self.fit(X_train, **kwargs) 1606 preds = self.predict(h=h, **kwargs) 1607 1608 if scoring is None: 1609 scoring = "neg_root_mean_squared_error" 1610 1611 if scoring == "pinball": 1612 # Predict requested quantile 1613 q_pred = self.predict(h=h, quantiles=[alpha], **kwargs) 1614 # Handle multivariate 1615 scores = [] 1616 for j in range(p): 1617 series_name = getattr(self, "series_names", [f"Series_{j}"])[j] 1618 q_label = ( 1619 f"{int(alpha * 100):02d}" 1620 if (alpha * 100).is_integer() 1621 else f"{alpha:.3f}".replace(".", "_") 1622 ) 1623 col = f"quantile_{q_label}_{series_name}" 1624 if col not in q_pred.columns: 1625 raise ValueError( 1626 f"Column '{col}' not found in quantile forecast output." 1627 ) 1628 y_true_j = X_test[:, j] 1629 y_pred_j = q_pred[col].values 1630 # Compute pinball loss for this series 1631 loss = mean_pinball_loss(y_true_j, y_pred_j, alpha=alpha) 1632 scores.append(loss) 1633 # Return average over series 1634 return np.mean(scores) 1635 1636 if scoring == "crps": 1637 # Ensure simulations exist 1638 preds = self.predict(h=h, **kwargs) # triggers self.sims_ 1639 # Extract simulations: list of DataFrames → (R, h, p) 1640 sims_vals = np.stack( 1641 [sim.values for sim in self.sims_], axis=0 1642 ) # (R, h, p) 1643 crps_scores = [] 1644 for j in range(p): 1645 y_true_j = X_test[:, j] 1646 sims_j = sims_vals[:, :, j] # (R, h) 1647 crps_j = self._crps_ensemble(np.asarray(y_true_j), sims_j) 1648 crps_scores.append(np.mean(crps_j)) # average over horizon 1649 return np.mean(crps_scores) # average over series 1650 1651 # check inputs 1652 assert scoring in ( 1653 "explained_variance", 1654 "neg_mean_absolute_error", 1655 "neg_mean_squared_error", 1656 "neg_root_mean_squared_error", 1657 "neg_mean_squared_log_error", 1658 "neg_median_absolute_error", 1659 "r2", 1660 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 1661 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 1662 'neg_median_absolute_error', 'r2')" 1663 1664 scoring_options = { 1665 "explained_variance": skm2.explained_variance_score, 1666 "neg_mean_absolute_error": skm2.mean_absolute_error, 1667 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 1668 "neg_root_mean_squared_error": lambda x, y: np.sqrt( 1669 np.mean((x - y) ** 2) 1670 ), 1671 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 1672 "neg_median_absolute_error": skm2.median_absolute_error, 1673 "r2": skm2.r2_score, 1674 } 1675 1676 return scoring_options[scoring](X_test, preds) 1677 1678 def plot(self, series=None, type_axis="dates", type_plot="pi"): 1679 """Plot time series forecast 1680 1681 Parameters: 1682 1683 series: {integer} or {string} 1684 series index or name 1685 1686 """ 1687 1688 assert all( 1689 [ 1690 self.mean_ is not None, 1691 self.lower_ is not None, 1692 self.upper_ is not None, 1693 self.output_dates_ is not None, 1694 ] 1695 ), "model forecasting must be obtained first (with predict)" 1696 1697 if series is None: 1698 # assert ( 1699 # self.init_n_series_ == 1 1700 # ), "please specify series index or name (n_series > 1)" 1701 series = 0 1702 1703 if isinstance(series, str): 1704 assert ( 1705 series in self.series_names 1706 ), f"series {series} doesn't exist in the input dataset" 1707 series_idx = self.df_.columns.get_loc(series) 1708 else: 1709 assert isinstance(series, int) and ( 1710 0 <= series < self.n_series 1711 ), f"check series index (< {self.n_series})" 1712 series_idx = series 1713 1714 y_all = list(self.df_.iloc[:, series_idx]) + list( 1715 self.mean_.iloc[:, series_idx] 1716 ) 1717 y_test = list(self.mean_.iloc[:, series_idx]) 1718 n_points_all = len(y_all) 1719 n_points_train = self.df_.shape[0] 1720 1721 if type_axis == "numeric": 1722 x_all = [i for i in range(n_points_all)] 1723 x_test = [i for i in range(n_points_train, n_points_all)] 1724 1725 if type_axis == "dates": # use dates 1726 x_all = np.concatenate( 1727 (self.input_dates.values, self.output_dates_.values), axis=None 1728 ) 1729 x_test = self.output_dates_.values 1730 1731 if type_plot == "pi": 1732 fig, ax = plt.subplots() 1733 ax.plot(x_all, y_all, "-") 1734 ax.plot(x_test, y_test, "-", color="orange") 1735 ax.fill_between( 1736 x_test, 1737 self.lower_.iloc[:, series_idx], 1738 self.upper_.iloc[:, series_idx], 1739 alpha=0.2, 1740 color="orange", 1741 ) 1742 if self.replications is None: 1743 if self.n_series > 1: 1744 plt.title( 1745 f"prediction intervals for {series}", 1746 loc="left", 1747 fontsize=12, 1748 fontweight=0, 1749 color="black", 1750 ) 1751 else: 1752 plt.title( 1753 f"prediction intervals for input time series", 1754 loc="left", 1755 fontsize=12, 1756 fontweight=0, 1757 color="black", 1758 ) 1759 plt.show() 1760 else: # self.replications is not None 1761 if self.n_series > 1: 1762 plt.title( 1763 f"prediction intervals for {self.replications} simulations of {series}", 1764 loc="left", 1765 fontsize=12, 1766 fontweight=0, 1767 color="black", 1768 ) 1769 else: 1770 plt.title( 1771 f"prediction intervals for {self.replications} simulations of input time series", 1772 loc="left", 1773 fontsize=12, 1774 fontweight=0, 1775 color="black", 1776 ) 1777 plt.show() 1778 1779 if type_plot == "spaghetti": 1780 palette = plt.get_cmap("Set1") 1781 sims_ix = getsims(self.sims_, series_idx) 1782 plt.plot(x_all, y_all, "-") 1783 for col_ix in range( 1784 sims_ix.shape[1] 1785 ): # avoid this when there are thousands of simulations 1786 plt.plot( 1787 x_test, 1788 sims_ix[:, col_ix], 1789 "-", 1790 color=palette(col_ix), 1791 linewidth=1, 1792 alpha=0.9, 1793 ) 1794 plt.plot(x_all, y_all, "-", color="black") 1795 plt.plot(x_test, y_test, "-", color="blue") 1796 # Add titles 1797 if self.n_series > 1: 1798 plt.title( 1799 f"{self.replications} simulations of {series}", 1800 loc="left", 1801 fontsize=12, 1802 fontweight=0, 1803 color="black", 1804 ) 1805 else: 1806 plt.title( 1807 f"{self.replications} simulations of input time series", 1808 loc="left", 1809 fontsize=12, 1810 fontweight=0, 1811 color="black", 1812 ) 1813 plt.xlabel("Time") 1814 plt.ylabel("Values") 1815 # Show the graph 1816 plt.show() 1817 1818 def cross_val_score( 1819 self, 1820 X, 1821 scoring="root_mean_squared_error", 1822 n_jobs=None, 1823 verbose=0, 1824 xreg=None, 1825 initial_window=5, 1826 horizon=3, 1827 fixed_window=False, 1828 show_progress=True, 1829 level=95, 1830 alpha=0.5, 1831 **kwargs, 1832 ): 1833 """Evaluate a score by time series cross-validation. 1834 1835 Parameters: 1836 1837 X: {array-like, sparse matrix} of shape (n_samples, n_features) 1838 The data to fit. 1839 1840 scoring: str or a function 1841 A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 1842 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 1843 'mean_absolute_percentage_error', 'winkler_score', 'coverage') 1844 Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries` 1845 1846 n_jobs: int, default=None 1847 Number of jobs to run in parallel. 1848 1849 verbose: int, default=0 1850 The verbosity level. 1851 1852 xreg: array-like, optional (default=None) 1853 Additional (external) regressors to be passed to `fit` 1854 xreg must be in 'increasing' order (most recent observations last) 1855 1856 initial_window: int 1857 initial number of consecutive values in each training set sample 1858 1859 horizon: int 1860 number of consecutive values in test set sample 1861 1862 fixed_window: boolean 1863 if False, all training samples start at index 0, and the training 1864 window's size is increasing. 1865 if True, the training window's size is fixed, and the window is 1866 rolling forward 1867 1868 show_progress: boolean 1869 if True, a progress bar is printed 1870 1871 level: int 1872 confidence level for prediction intervals 1873 1874 alpha: float 1875 quantile level for pinball loss if scoring='pinball' 1876 0 < alpha < 1 1877 1878 **kwargs: dict 1879 additional parameters to be passed to `fit` and `predict` 1880 1881 Returns: 1882 1883 A tuple: descriptive statistics or errors and raw errors 1884 1885 """ 1886 tscv = TimeSeriesSplit() 1887 1888 tscv_obj = tscv.split( 1889 X, 1890 initial_window=initial_window, 1891 horizon=horizon, 1892 fixed_window=fixed_window, 1893 ) 1894 1895 if isinstance(scoring, str): 1896 assert scoring in ( 1897 "pinball", 1898 "crps", 1899 "root_mean_squared_error", 1900 "mean_squared_error", 1901 "mean_error", 1902 "mean_absolute_error", 1903 "mean_percentage_error", 1904 "mean_absolute_percentage_error", 1905 "winkler_score", 1906 "coverage", 1907 ), "must have scoring in ('pinball', 'crps', 'root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error', 'winkler_score', 'coverage')" 1908 1909 def err_func(X_test, X_pred, scoring, alpha=0.5): 1910 if (self.replications is not None) or ( 1911 self.type_pi == "gaussian" 1912 ): # probabilistic 1913 if scoring == "pinball": 1914 # Predict requested quantile 1915 q_pred = self.predict( 1916 h=len(X_test), quantiles=[alpha], **kwargs 1917 ) 1918 # Handle multivariate 1919 scores = [] 1920 p = X_test.shape[1] if len(X_test.shape) > 1 else 1 1921 for j in range(p): 1922 series_name = getattr( 1923 self, "series_names", [f"Series_{j}"] 1924 )[j] 1925 q_label = ( 1926 f"{int(alpha * 100):02d}" 1927 if (alpha * 100).is_integer() 1928 else f"{alpha:.3f}".replace(".", "_") 1929 ) 1930 col = f"quantile_{q_label}_{series_name}" 1931 if col not in q_pred.columns: 1932 raise ValueError( 1933 f"Column '{col}' not found in quantile forecast output." 1934 ) 1935 try: 1936 y_true_j = X_test[:, j] if p > 1 else X_test 1937 except: 1938 y_true_j = ( 1939 X_test.iloc[:, j] 1940 if p > 1 1941 else X_test.values 1942 ) 1943 y_pred_j = q_pred[col].values 1944 # Compute pinball loss for this series 1945 loss = mean_pinball_loss( 1946 y_true_j, y_pred_j, alpha=alpha 1947 ) 1948 scores.append(loss) 1949 # Return average over series 1950 return np.mean(scores) 1951 elif scoring == "crps": 1952 # Ensure simulations exist 1953 _ = self.predict( 1954 h=len(X_test), **kwargs 1955 ) # triggers self.sims_ 1956 # Extract simulations: list of DataFrames → (R, h, p) 1957 sims_vals = np.stack( 1958 [sim.values for sim in self.sims_], axis=0 1959 ) # (R, h, p) 1960 crps_scores = [] 1961 p = X_test.shape[1] if len(X_test.shape) > 1 else 1 1962 for j in range(p): 1963 try: 1964 y_true_j = X_test[:, j] if p > 1 else X_test 1965 except Exception as e: 1966 y_true_j = ( 1967 X_test.iloc[:, j] 1968 if p > 1 1969 else X_test.values 1970 ) 1971 sims_j = sims_vals[:, :, j] # (R, h) 1972 crps_j = self._crps_ensemble( 1973 np.asarray(y_true_j), sims_j 1974 ) 1975 crps_scores.append( 1976 np.mean(crps_j) 1977 ) # average over horizon 1978 return np.mean(crps_scores) # average over series 1979 if scoring == "winkler_score": 1980 return winkler_score(X_pred, X_test, level=level) 1981 elif scoring == "coverage": 1982 return coverage(X_pred, X_test, level=level) 1983 else: 1984 return mean_errors( 1985 pred=X_pred.mean, actual=X_test, scoring=scoring 1986 ) 1987 else: # not probabilistic 1988 return mean_errors( 1989 pred=X_pred, actual=X_test, scoring=scoring 1990 ) 1991 1992 else: # isinstance(scoring, str) = False 1993 err_func = scoring 1994 1995 errors = [] 1996 1997 train_indices = [] 1998 1999 test_indices = [] 2000 2001 for train_index, test_index in tscv_obj: 2002 train_indices.append(train_index) 2003 test_indices.append(test_index) 2004 2005 if show_progress is True: 2006 iterator = tqdm( 2007 zip(train_indices, test_indices), total=len(train_indices) 2008 ) 2009 else: 2010 iterator = zip(train_indices, test_indices) 2011 2012 for train_index, test_index in iterator: 2013 if verbose == 1: 2014 print(f"TRAIN: {train_index}") 2015 print(f"TEST: {test_index}") 2016 2017 if isinstance(X, pd.DataFrame): 2018 self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs) 2019 X_test = X.iloc[test_index, :] 2020 else: 2021 self.fit(X[train_index, :], xreg=xreg, **kwargs) 2022 X_test = X[test_index, :] 2023 X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs) 2024 2025 errors.append(err_func(X_test, X_pred, scoring, alpha=alpha)) 2026 2027 res = np.asarray(errors) 2028 2029 return res, describe(res) 2030 2031 def _compute_information_criterion(self, curr_lags, criterion="AIC"): 2032 """Compute information criterion using existing residuals 2033 2034 Parameters 2035 ---------- 2036 curr_lags : int 2037 Current number of lags being evaluated 2038 criterion : str 2039 One of 'AIC', 'AICc', or 'BIC' 2040 2041 Returns 2042 ------- 2043 float 2044 Information criterion value or inf if parameters exceed observations 2045 """ 2046 # Get dimensions 2047 n_obs = self.residuals_.shape[0] 2048 n_features = int(self.init_n_series_ * curr_lags) 2049 n_hidden = int(self.n_hidden_features) 2050 # Calculate number of parameters 2051 term1 = int(n_features * n_hidden) 2052 term2 = int(n_hidden * self.init_n_series_) 2053 n_params = term1 + term2 2054 # Check if we have enough observations for the number of parameters 2055 if n_obs <= n_params + 1: 2056 return float("inf") # Return infinity if too many parameters 2057 # Compute RSS using existing residuals 2058 rss = np.sum(self.residuals_**2) 2059 # Compute criterion 2060 if criterion == "AIC": 2061 ic = n_obs * np.log(rss / n_obs) + 2 * n_params 2062 elif criterion == "AICc": 2063 ic = n_obs * np.log(rss / n_obs) + 2 * n_params * ( 2064 n_obs / (n_obs - n_params - 1) 2065 ) 2066 else: # BIC 2067 ic = n_obs * np.log(rss / n_obs) + n_params * np.log(n_obs) 2068 2069 return ic
Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks
Parameters:
obj: object.
any object containing a method fit (obj.fit()) and a method predict
(obj.predict()).
n_hidden_features: int.
number of nodes in the hidden layer.
activation_name: str.
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
a: float.
hyperparameter for 'prelu' or 'elu' activation function.
nodes_sim: str.
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'.
bias: boolean.
indicates if the hidden layer contains a bias term (True) or not
(False).
dropout: float.
regularization parameter; (random) percentage of nodes dropped out
of the training.
direct_link: boolean.
indicates if the original predictors are included (True) in model's fitting or not (False).
n_clusters: int.
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
cluster_encode: bool.
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding.
type_clust: str.
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm').
type_scaling: a tuple of 3 strings.
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax').
lags: int.
number of lags used for each time series.
If string, lags must be one of 'AIC', 'AICc', or 'BIC'.
type_pi: str.
type of prediction interval; currently:
- "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
- "quantile": use model-agnostic quantile regression under the hood
- "kde": based on Kernel Density Estimation of in-sample residuals
- "bootstrap": based on independent bootstrap of in-sample residuals
- "block-bootstrap": based on basic block bootstrap of in-sample residuals
- "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
- "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
- "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
- "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
- "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
- "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
- based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton',
'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student'
- 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton',
'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student'
- 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton',
'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student'
level: int.
level of confidence for `type_pi == 'quantile'` (default is `95`)
block_size: int.
size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
Default is round(3.15*(n_residuals^1/3))
replications: int.
number of replications (if needed, for predictive simulation). Default is 'None'.
kernel: str.
the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
agg: str.
either "mean" or "median" for simulation of bootstrap aggregating
seed: int.
reproducibility seed for nodes_sim=='uniform' or predictive simulation.
backend: str.
"cpu" or "gpu" or "tpu".
verbose: int.
0: not printing; 1: printing
show_progress: bool.
True: progress bar when fitting each series; False: no progress bar when fitting each series
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
y_: {array-like}
MTS responses (most recent observations first)
X_: {array-like}
MTS lags
xreg_: {array-like}
external regressors
y_means_: dict
a dictionary of each series mean values
preds_: {array-like}
successive model predictions
preds_std_: {array-like}
standard deviation around the predictions for Bayesian base learners (`obj`)
gaussian_preds_std_: {array-like}
standard deviation around the predictions for `type_pi='gaussian'`
return_std_: boolean
return uncertainty or not (set in predict)
df_: data frame
the input data frame, in case a data.frame is provided to `fit`
n_obs_: int
number of time series observations (number of rows for multivariate)
level_: int
level of confidence for prediction intervals (default is 95)
residuals_: {array-like}
in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals
(for `type_pi` in conformal prediction)
residuals_sims_: tuple of {array-like}
simulations of in-sample residuals (for `type_pi` not conformal prediction) or
calibrated residuals (for `type_pi` in conformal prediction)
kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html
residuals_std_dev_: residuals standard deviation
Examples:
Example 1:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
np.random.seed(123)
M = np.random.rand(10, 3)
M[:,0] = 10*M[:,0]
M[:,2] = 25*M[:,2]
print(M)
# Adjust Bayesian Ridge
regr4 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5)
obj_MTS.fit(M)
print(obj_MTS.predict())
# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))
print(obj_MTS.predict(return_std=True, level=95))
Example 2:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)
# Adjust Bayesian Ridge
regr5 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5)
obj_MTS.fit(df)
print(obj_MTS.predict())
# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))
print(obj_MTS.predict(return_std=True, level=95))
337 def fit(self, X, xreg=None, **kwargs): 338 """Fit MTS model to training data X, with optional regressors xreg 339 340 Parameters: 341 342 X: {array-like}, shape = [n_samples, n_features] 343 Training time series, where n_samples is the number 344 of samples and n_features is the number of features; 345 X must be in increasing order (most recent observations last) 346 347 xreg: {array-like}, shape = [n_samples, n_features_xreg] 348 Additional (external) regressors to be passed to self.obj 349 xreg must be in 'increasing' order (most recent observations last) 350 351 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 352 353 Returns: 354 355 self: object 356 """ 357 try: 358 self.init_n_series_ = X.shape[1] 359 except IndexError as e: 360 self.init_n_series_ = 1 361 362 # Automatic lag selection if requested 363 if isinstance(self.lags, str): 364 max_lags = min(25, X.shape[0] // 4) 365 best_ic = float("inf") 366 best_lags = 1 367 368 if self.verbose: 369 print( 370 f"\nSelecting optimal number of lags using {self.lags}..." 371 ) 372 iterator = tqdm(range(1, max_lags + 1)) 373 else: 374 iterator = range(1, max_lags + 1) 375 376 for lag in iterator: 377 # Convert DataFrame to numpy array before reversing 378 if isinstance(X, pd.DataFrame): 379 X_values = X.values[::-1] 380 else: 381 X_values = X[::-1] 382 383 # Try current lag value 384 if self.init_n_series_ > 1: 385 mts_input = ts.create_train_inputs(X_values, lag) 386 else: 387 mts_input = ts.create_train_inputs( 388 X_values.reshape(-1, 1), lag 389 ) 390 391 # Cook training set and fit model 392 dummy_y, scaled_Z = self.cook_training_set( 393 y=np.ones(mts_input[0].shape[0]), X=mts_input[1] 394 ) 395 residuals_ = [] 396 397 for i in range(self.init_n_series_): 398 y_mean = np.mean(mts_input[0][:, i]) 399 centered_y_i = mts_input[0][:, i] - y_mean 400 self.obj.fit(X=scaled_Z, y=centered_y_i) 401 residuals_.append( 402 (centered_y_i - self.obj.predict(scaled_Z)).tolist() 403 ) 404 405 self.residuals_ = np.asarray(residuals_).T 406 ic = self._compute_information_criterion( 407 curr_lags=lag, criterion=self.lags 408 ) 409 410 if self.verbose: 411 print(f"Trying lags={lag}, {self.lags}={ic:.2f}") 412 413 if ic < best_ic: 414 best_ic = ic 415 best_lags = lag 416 417 if self.verbose: 418 print( 419 f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}" 420 ) 421 422 self.lags = best_lags 423 424 self.input_dates = None 425 self.df_ = None 426 427 if isinstance(X, pd.DataFrame) is False: 428 # input data set is a numpy array 429 if xreg is None: 430 X = pd.DataFrame(X) 431 self.series_names = [ 432 "series" + str(i) for i in range(X.shape[1]) 433 ] 434 else: 435 # xreg is not None 436 X = mo.cbind(X, xreg) 437 self.xreg_ = xreg 438 439 else: # input data set is a DataFrame with column names 440 X_index = None 441 if X.index is not None: 442 X_index = X.index 443 if xreg is None: 444 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 445 else: 446 X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg)) 447 self.xreg_ = xreg 448 if X_index is not None: 449 X.index = X_index 450 self.series_names = X.columns.tolist() 451 452 if isinstance(X, pd.DataFrame): 453 if self.df_ is None: 454 self.df_ = X 455 X = X.values 456 else: 457 input_dates_prev = pd.DatetimeIndex(self.df_.index.values) 458 frequency = pd.infer_freq(input_dates_prev) 459 self.df_ = pd.concat([self.df_, X], axis=0) 460 self.input_dates = pd.date_range( 461 start=input_dates_prev[0], 462 periods=len(input_dates_prev) + X.shape[0], 463 freq=frequency, 464 ).values.tolist() 465 self.df_.index = self.input_dates 466 X = self.df_.values 467 self.df_.columns = self.series_names 468 else: 469 if self.df_ is None: 470 self.df_ = pd.DataFrame(X, columns=self.series_names) 471 else: 472 self.df_ = pd.concat( 473 [self.df_, pd.DataFrame(X, columns=self.series_names)], 474 axis=0, 475 ) 476 477 self.input_dates = ts.compute_input_dates(self.df_) 478 479 try: 480 # multivariate time series 481 n, p = X.shape 482 except: 483 # univariate time series 484 n = X.shape[0] 485 p = 1 486 self.n_obs_ = n 487 488 rep_1_n = np.repeat(1, n) 489 490 self.y_ = None 491 self.X_ = None 492 self.n_series = p 493 self.fit_objs_.clear() 494 self.y_means_.clear() 495 residuals_ = [] 496 self.residuals_ = None 497 self.residuals_sims_ = None 498 self.kde_ = None 499 self.sims_ = None 500 self.scaled_Z_ = None 501 self.centered_y_is_ = [] 502 503 if self.init_n_series_ > 1: 504 # multivariate time series 505 mts_input = ts.create_train_inputs(X[::-1], self.lags) 506 else: 507 # univariate time series 508 mts_input = ts.create_train_inputs( 509 X.reshape(-1, 1)[::-1], self.lags 510 ) 511 512 self.y_ = mts_input[0] 513 514 self.X_ = mts_input[1] 515 516 dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_) 517 518 self.scaled_Z_ = scaled_Z 519 520 # loop on all the time series and adjust self.obj.fit 521 if self.verbose > 0: 522 print( 523 f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n" 524 ) 525 526 if self.show_progress is True: 527 iterator = tqdm(range(self.init_n_series_)) 528 else: 529 iterator = range(self.init_n_series_) 530 531 if self.type_pi in ( 532 "gaussian", 533 "kde", 534 "bootstrap", 535 "block-bootstrap", 536 ) or self.type_pi.startswith("vine"): 537 for i in iterator: 538 y_mean = np.mean(self.y_[:, i]) 539 self.y_means_[i] = y_mean 540 centered_y_i = self.y_[:, i] - y_mean 541 self.centered_y_is_.append(centered_y_i) 542 self.obj.fit(X=scaled_Z, y=centered_y_i) 543 self.fit_objs_[i] = deepcopy(self.obj) 544 residuals_.append( 545 ( 546 centered_y_i - self.fit_objs_[i].predict(scaled_Z) 547 ).tolist() 548 ) 549 550 if self.type_pi == "quantile": 551 for i in iterator: 552 y_mean = np.mean(self.y_[:, i]) 553 self.y_means_[i] = y_mean 554 centered_y_i = self.y_[:, i] - y_mean 555 self.centered_y_is_.append(centered_y_i) 556 self.obj.fit(X=scaled_Z, y=centered_y_i) 557 self.fit_objs_[i] = deepcopy(self.obj) 558 559 if self.type_pi.startswith("scp"): 560 # split conformal prediction 561 for i in iterator: 562 n_y = self.y_.shape[0] 563 n_y_half = n_y // 2 564 first_half_idx = range(0, n_y_half) 565 second_half_idx = range(n_y_half, n_y) 566 y_mean_temp = np.mean(self.y_[first_half_idx, i]) 567 centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp 568 self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp) 569 # calibrated residuals actually 570 residuals_.append( 571 ( 572 self.y_[second_half_idx, i] 573 - ( 574 y_mean_temp 575 + self.obj.predict(scaled_Z[second_half_idx, :]) 576 ) 577 ).tolist() 578 ) 579 # fit on the second half 580 y_mean = np.mean(self.y_[second_half_idx, i]) 581 self.y_means_[i] = y_mean 582 centered_y_i = self.y_[second_half_idx, i] - y_mean 583 self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i) 584 self.fit_objs_[i] = deepcopy(self.obj) 585 586 self.residuals_ = np.asarray(residuals_).T 587 588 if self.type_pi == "gaussian": 589 self.gaussian_preds_std_ = np.std(self.residuals_, axis=0) 590 591 if self.type_pi.startswith("scp2"): 592 # Calculate mean and standard deviation for each column 593 data_mean = np.mean(self.residuals_, axis=0) 594 self.residuals_std_dev_ = np.std(self.residuals_, axis=0) 595 # Center and scale the array using broadcasting 596 self.residuals_ = ( 597 self.residuals_ - data_mean[np.newaxis, :] 598 ) / self.residuals_std_dev_[np.newaxis, :] 599 600 if self.replications != None and "kde" in self.type_pi: 601 if self.verbose > 0: 602 print(f"\n Simulate residuals using {self.kernel} kernel... \n") 603 assert self.kernel in ( 604 "gaussian", 605 "tophat", 606 ), "currently, 'kernel' must be either 'gaussian' or 'tophat'" 607 kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)} 608 grid = GridSearchCV( 609 KernelDensity(kernel=self.kernel, **kwargs), 610 param_grid=kernel_bandwidths, 611 ) 612 grid.fit(self.residuals_) 613 614 if self.verbose > 0: 615 print( 616 f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n" 617 ) 618 619 self.kde_ = grid.best_estimator_ 620 621 return self
Fit MTS model to training data X, with optional regressors xreg
Parameters:
X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)
xreg: {array-like}, shape = [n_samples, n_features_xreg] Additional (external) regressors to be passed to self.obj xreg must be in 'increasing' order (most recent observations last)
**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
Returns:
self: object
953 def predict(self, h=5, level=95, quantiles=None, **kwargs): 954 """Forecast all the time series, h steps ahead""" 955 956 if quantiles is not None: 957 # Validate 958 quantiles = np.asarray(quantiles) 959 if not ((quantiles > 0) & (quantiles < 1)).all(): 960 raise ValueError("quantiles must be between 0 and 1.") 961 # Delegate to dedicated method 962 return self._predict_quantiles(h=h, quantiles=quantiles, **kwargs) 963 964 if isinstance(level, list) or isinstance(level, np.ndarray): 965 # Store results 966 result_dict = {} 967 # Loop through alphas and calculate lower/upper for each alpha level 968 # E.g [0.5, 2.5, 5, 16.5, 25, 50] 969 for lev in level: 970 # Get the forecast for this alpha 971 res = self.predict(h=h, level=lev, **kwargs) 972 # Adjust index and collect lower/upper bounds 973 res.lower.index = pd.to_datetime(res.lower.index) 974 res.upper.index = pd.to_datetime(res.upper.index) 975 # Loop over each time series (multivariate) and flatten results 976 if isinstance(res.lower, pd.DataFrame): 977 for ( 978 series 979 ) in ( 980 res.lower.columns 981 ): # Assumes 'lower' and 'upper' have multiple series 982 result_dict[f"lower_{lev}_{series}"] = ( 983 res.lower[series].to_numpy().flatten() 984 ) 985 result_dict[f"upper_{lev}_{series}"] = ( 986 res.upper[series].to_numpy().flatten() 987 ) 988 else: 989 for series_id in range( 990 self.n_series 991 ): # Assumes 'lower' and 'upper' have multiple series 992 result_dict[f"lower_{lev}_{series_id}"] = ( 993 res.lower[series_id, :].to_numpy().flatten() 994 ) 995 result_dict[f"upper_{lev}_{series_id}"] = ( 996 res.upper[series_id, :].to_numpy().flatten() 997 ) 998 return pd.DataFrame(result_dict, index=self.output_dates_) 999 1000 # only one prediction interval 1001 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 1002 1003 self.level_ = level 1004 1005 self.return_std_ = False # do not remove (/!\) 1006 1007 self.mean_ = None # do not remove (/!\) 1008 1009 self.mean_ = deepcopy(self.y_) # do not remove (/!\) 1010 1011 self.lower_ = None # do not remove (/!\) 1012 1013 self.upper_ = None # do not remove (/!\) 1014 1015 self.sims_ = None # do not remove (/!\) 1016 1017 y_means_ = np.asarray( 1018 [self.y_means_[i] for i in range(self.init_n_series_)] 1019 ) 1020 1021 n_features = self.init_n_series_ * self.lags 1022 1023 self.alpha_ = 100 - level 1024 1025 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 1026 1027 if "return_std" in kwargs: # bayesian forecasting 1028 self.return_std_ = True 1029 self.preds_std_ = [] 1030 DescribeResult = namedtuple( 1031 "DescribeResult", ("mean", "lower", "upper") 1032 ) # to be updated 1033 1034 if "return_pi" in kwargs: # split conformal, without simulation 1035 mean_pi_ = [] 1036 lower_pi_ = [] 1037 upper_pi_ = [] 1038 median_pi_ = [] 1039 DescribeResult = namedtuple( 1040 "DescribeResult", ("mean", "lower", "upper") 1041 ) # to be updated 1042 1043 if self.kde_ != None and "kde" in self.type_pi: # kde 1044 target_cols = self.df_.columns[ 1045 : self.init_n_series_ 1046 ] # Get target column names 1047 if self.verbose == 1: 1048 self.residuals_sims_ = tuple( 1049 self.kde_.sample( 1050 n_samples=h, random_state=self.seed + 100 * i 1051 ) # Keep full sample 1052 for i in tqdm(range(self.replications)) 1053 ) 1054 elif self.verbose == 0: 1055 self.residuals_sims_ = tuple( 1056 self.kde_.sample( 1057 n_samples=h, random_state=self.seed + 100 * i 1058 ) # Keep full sample 1059 for i in range(self.replications) 1060 ) 1061 1062 # Convert to DataFrames after sampling 1063 self.residuals_sims_ = tuple( 1064 pd.DataFrame( 1065 sim, # Keep all columns 1066 columns=target_cols, # Use original target column names 1067 index=self.output_dates_, 1068 ) 1069 for sim in self.residuals_sims_ 1070 ) 1071 1072 if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"): 1073 assert self.replications is not None and isinstance( 1074 self.replications, int 1075 ), "'replications' must be provided and be an integer" 1076 if self.verbose == 1: 1077 self.residuals_sims_ = tuple( 1078 ts.bootstrap( 1079 self.residuals_, 1080 h=h, 1081 block_size=None, 1082 seed=self.seed + 100 * i, 1083 ) 1084 for i in tqdm(range(self.replications)) 1085 ) 1086 elif self.verbose == 0: 1087 self.residuals_sims_ = tuple( 1088 ts.bootstrap( 1089 self.residuals_, 1090 h=h, 1091 block_size=None, 1092 seed=self.seed + 100 * i, 1093 ) 1094 for i in range(self.replications) 1095 ) 1096 1097 if self.type_pi in ( 1098 "block-bootstrap", 1099 "scp-block-bootstrap", 1100 "scp2-block-bootstrap", 1101 ): 1102 if self.block_size is None: 1103 self.block_size = int( 1104 np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3))) 1105 ) 1106 1107 assert self.replications is not None and isinstance( 1108 self.replications, int 1109 ), "'replications' must be provided and be an integer" 1110 if self.verbose == 1: 1111 self.residuals_sims_ = tuple( 1112 ts.bootstrap( 1113 self.residuals_, 1114 h=h, 1115 block_size=self.block_size, 1116 seed=self.seed + 100 * i, 1117 ) 1118 for i in tqdm(range(self.replications)) 1119 ) 1120 elif self.verbose == 0: 1121 self.residuals_sims_ = tuple( 1122 ts.bootstrap( 1123 self.residuals_, 1124 h=h, 1125 block_size=self.block_size, 1126 seed=self.seed + 100 * i, 1127 ) 1128 for i in range(self.replications) 1129 ) 1130 1131 if "vine" in self.type_pi: 1132 if self.verbose == 1: 1133 self.residuals_sims_ = tuple( 1134 vinecopula_sample( 1135 x=self.residuals_, 1136 n_samples=h, 1137 method=self.type_pi, 1138 random_state=self.seed + 100 * i, 1139 ) 1140 for i in tqdm(range(self.replications)) 1141 ) 1142 elif self.verbose == 0: 1143 self.residuals_sims_ = tuple( 1144 vinecopula_sample( 1145 x=self.residuals_, 1146 n_samples=h, 1147 method=self.type_pi, 1148 random_state=self.seed + 100 * i, 1149 ) 1150 for i in range(self.replications) 1151 ) 1152 1153 mean_ = deepcopy(self.mean_) 1154 1155 for i in range(h): 1156 new_obs = ts.reformat_response(mean_, self.lags) 1157 new_X = new_obs.reshape(1, -1) 1158 cooked_new_X = self.cook_test_set(new_X, **kwargs) 1159 1160 if "return_std" in kwargs: 1161 self.preds_std_.append( 1162 [ 1163 np.asarray( 1164 self.fit_objs_[i].predict( 1165 cooked_new_X, return_std=True 1166 )[1] 1167 ).item() 1168 for i in range(self.n_series) 1169 ] 1170 ) 1171 1172 if "return_pi" in kwargs: 1173 for i in range(self.n_series): 1174 preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs) 1175 mean_pi_.append(preds_pi.mean[0]) 1176 lower_pi_.append(preds_pi.lower[0]) 1177 upper_pi_.append(preds_pi.upper[0]) 1178 1179 if self.type_pi != "quantile": 1180 predicted_cooked_new_X = np.asarray( 1181 [ 1182 np.asarray( 1183 self.fit_objs_[i].predict(cooked_new_X) 1184 ).item() 1185 for i in range(self.init_n_series_) 1186 ] 1187 ) 1188 else: 1189 predicted_cooked_new_X = np.asarray( 1190 [ 1191 np.asarray( 1192 self.fit_objs_[i] 1193 .predict(cooked_new_X, return_pi=True) 1194 .upper 1195 ).item() 1196 for i in range(self.init_n_series_) 1197 ] 1198 ) 1199 1200 preds = np.asarray(y_means_ + predicted_cooked_new_X) 1201 1202 # Create full row with both predictions and external regressors 1203 if self.xreg_ is not None and "xreg" in kwargs: 1204 next_xreg = kwargs["xreg"].iloc[i: i + 1].values.flatten() 1205 full_row = np.concatenate([preds, next_xreg]) 1206 else: 1207 full_row = preds 1208 1209 # Create a new row with same number of columns as mean_ 1210 new_row = np.zeros((1, mean_.shape[1])) 1211 new_row[0, : full_row.shape[0]] = full_row 1212 1213 # Maintain the full dimensionality by using vstack instead of rbind 1214 mean_ = np.vstack([new_row, mean_[:-1]]) 1215 1216 # Final output should only include the target columns 1217 self.mean_ = pd.DataFrame( 1218 mean_[0: min(h, self.n_obs_ - self.lags), : self.init_n_series_][ 1219 ::-1 1220 ], 1221 columns=self.df_.columns[: self.init_n_series_], 1222 index=self.output_dates_, 1223 ) 1224 1225 # function's return ---------------------------------------------------------------------- 1226 if ( 1227 (("return_std" not in kwargs) and ("return_pi" not in kwargs)) 1228 and (self.type_pi not in ("gaussian", "scp")) 1229 ) or ("vine" in self.type_pi): 1230 if self.replications is None: 1231 return self.mean_.iloc[:, : self.init_n_series_] 1232 1233 # if "return_std" not in kwargs and self.replications is not None 1234 meanf = [] 1235 medianf = [] 1236 lower = [] 1237 upper = [] 1238 1239 if "scp2" in self.type_pi: 1240 if self.verbose == 1: 1241 self.sims_ = tuple( 1242 ( 1243 self.mean_ 1244 + self.residuals_sims_[i] 1245 * self.residuals_std_dev_[np.newaxis, :] 1246 for i in tqdm(range(self.replications)) 1247 ) 1248 ) 1249 elif self.verbose == 0: 1250 self.sims_ = tuple( 1251 ( 1252 self.mean_ 1253 + self.residuals_sims_[i] 1254 * self.residuals_std_dev_[np.newaxis, :] 1255 for i in range(self.replications) 1256 ) 1257 ) 1258 else: 1259 if self.verbose == 1: 1260 self.sims_ = tuple( 1261 ( 1262 self.mean_ + self.residuals_sims_[i] 1263 for i in tqdm(range(self.replications)) 1264 ) 1265 ) 1266 elif self.verbose == 0: 1267 self.sims_ = tuple( 1268 ( 1269 self.mean_ + self.residuals_sims_[i] 1270 for i in range(self.replications) 1271 ) 1272 ) 1273 1274 DescribeResult = namedtuple( 1275 "DescribeResult", ("mean", "sims", "lower", "upper") 1276 ) 1277 for ix in range(self.init_n_series_): 1278 sims_ix = getsims(self.sims_, ix) 1279 if self.agg == "mean": 1280 meanf.append(np.mean(sims_ix, axis=1)) 1281 else: 1282 medianf.append(np.median(sims_ix, axis=1)) 1283 lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1)) 1284 upper.append( 1285 np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1) 1286 ) 1287 self.mean_ = pd.DataFrame( 1288 np.asarray(meanf).T, 1289 columns=self.series_names[ 1290 : self.init_n_series_ 1291 ], # self.df_.columns, 1292 index=self.output_dates_, 1293 ) 1294 1295 self.lower_ = pd.DataFrame( 1296 np.asarray(lower).T, 1297 columns=self.series_names[ 1298 : self.init_n_series_ 1299 ], # self.df_.columns, 1300 index=self.output_dates_, 1301 ) 1302 1303 self.upper_ = pd.DataFrame( 1304 np.asarray(upper).T, 1305 columns=self.series_names[ 1306 : self.init_n_series_ 1307 ], # self.df_.columns, 1308 index=self.output_dates_, 1309 ) 1310 1311 try: 1312 self.median_ = pd.DataFrame( 1313 np.asarray(medianf).T, 1314 columns=self.series_names[ 1315 : self.init_n_series_ 1316 ], # self.df_.columns, 1317 index=self.output_dates_, 1318 ) 1319 except Exception as e: 1320 pass 1321 1322 return DescribeResult( 1323 self.mean_, self.sims_, self.lower_, self.upper_ 1324 ) 1325 1326 if ( 1327 (("return_std" in kwargs) or ("return_pi" in kwargs)) 1328 and (self.type_pi not in ("gaussian", "scp")) 1329 ) or "vine" in self.type_pi: 1330 DescribeResult = namedtuple( 1331 "DescribeResult", ("mean", "lower", "upper") 1332 ) 1333 1334 self.mean_ = pd.DataFrame( 1335 np.asarray(self.mean_), 1336 columns=self.series_names, # self.df_.columns, 1337 index=self.output_dates_, 1338 ) 1339 1340 if "return_std" in kwargs: 1341 self.preds_std_ = np.asarray(self.preds_std_) 1342 1343 self.lower_ = pd.DataFrame( 1344 self.mean_.values - pi_multiplier * self.preds_std_, 1345 columns=self.series_names, # self.df_.columns, 1346 index=self.output_dates_, 1347 ) 1348 1349 self.upper_ = pd.DataFrame( 1350 self.mean_.values + pi_multiplier * self.preds_std_, 1351 columns=self.series_names, # self.df_.columns, 1352 index=self.output_dates_, 1353 ) 1354 1355 if "return_pi" in kwargs: 1356 self.lower_ = pd.DataFrame( 1357 np.asarray(lower_pi_).reshape(h, self.n_series) 1358 + y_means_[np.newaxis, :], 1359 columns=self.series_names, # self.df_.columns, 1360 index=self.output_dates_, 1361 ) 1362 1363 self.upper_ = pd.DataFrame( 1364 np.asarray(upper_pi_).reshape(h, self.n_series) 1365 + y_means_[np.newaxis, :], 1366 columns=self.series_names, # self.df_.columns, 1367 index=self.output_dates_, 1368 ) 1369 1370 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1371 1372 if self.xreg_ is not None: 1373 if len(self.xreg_.shape) > 1: 1374 res2 = mx.tuple_map( 1375 res, 1376 lambda x: mo.delete_last_columns( 1377 x, num_columns=self.xreg_.shape[1] 1378 ), 1379 ) 1380 else: 1381 res2 = mx.tuple_map( 1382 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1383 ) 1384 return DescribeResult(res2[0], res2[1], res2[2]) 1385 1386 return res 1387 1388 if self.type_pi == "gaussian": 1389 DescribeResult = namedtuple( 1390 "DescribeResult", ("mean", "lower", "upper") 1391 ) 1392 1393 self.mean_ = pd.DataFrame( 1394 np.asarray(self.mean_), 1395 columns=self.series_names, # self.df_.columns, 1396 index=self.output_dates_, 1397 ) 1398 1399 # Use Bayesian std if available, otherwise use gaussian residual std 1400 if "return_std" in kwargs and len(self.preds_std_) > 0: 1401 preds_std_to_use = np.asarray(self.preds_std_) 1402 else: 1403 preds_std_to_use = self.gaussian_preds_std_ 1404 1405 self.lower_ = pd.DataFrame( 1406 self.mean_.values - pi_multiplier * preds_std_to_use, 1407 columns=self.series_names, # self.df_.columns, 1408 index=self.output_dates_, 1409 ) 1410 1411 self.upper_ = pd.DataFrame( 1412 self.mean_.values + pi_multiplier * preds_std_to_use, 1413 columns=self.series_names, # self.df_.columns, 1414 index=self.output_dates_, 1415 ) 1416 1417 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1418 1419 if self.xreg_ is not None: 1420 if len(self.xreg_.shape) > 1: 1421 res2 = mx.tuple_map( 1422 res, 1423 lambda x: mo.delete_last_columns( 1424 x, num_columns=self.xreg_.shape[1] 1425 ), 1426 ) 1427 else: 1428 res2 = mx.tuple_map( 1429 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1430 ) 1431 return DescribeResult(res2[0], res2[1], res2[2]) 1432 1433 return res 1434 1435 if self.type_pi == "quantile": 1436 DescribeResult = namedtuple("DescribeResult", ("mean")) 1437 1438 self.mean_ = pd.DataFrame( 1439 np.asarray(self.mean_), 1440 columns=self.series_names, # self.df_.columns, 1441 index=self.output_dates_, 1442 ) 1443 1444 res = DescribeResult(self.mean_) 1445 1446 if self.xreg_ is not None: 1447 if len(self.xreg_.shape) > 1: 1448 res2 = mx.tuple_map( 1449 res, 1450 lambda x: mo.delete_last_columns( 1451 x, num_columns=self.xreg_.shape[1] 1452 ), 1453 ) 1454 else: 1455 res2 = mx.tuple_map( 1456 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1457 ) 1458 return DescribeResult(res2[0]) 1459 1460 return res 1461 1462 # After prediction loop, ensure sims only contain target columns 1463 if self.sims_ is not None: 1464 if self.verbose == 1: 1465 self.sims_ = tuple( 1466 sim[:h,] # Only keep target columns and h rows 1467 for sim in tqdm(self.sims_) 1468 ) 1469 elif self.verbose == 0: 1470 self.sims_ = tuple( 1471 sim[:h,] # Only keep target columns and h rows 1472 for sim in self.sims_ 1473 ) 1474 1475 # Convert numpy arrays to DataFrames with proper columns 1476 self.sims_ = tuple( 1477 pd.DataFrame( 1478 sim, 1479 columns=self.df_.columns[: self.init_n_series_], 1480 index=self.output_dates_, 1481 ) 1482 for sim in self.sims_ 1483 ) 1484 1485 if self.type_pi in ( 1486 "kde", 1487 "bootstrap", 1488 "block-bootstrap", 1489 "vine-copula", 1490 ): 1491 if self.xreg_ is not None: 1492 # Use getsimsxreg when external regressors are present 1493 target_cols = self.df_.columns[: self.init_n_series_] 1494 self.sims_ = getsimsxreg( 1495 self.sims_, self.output_dates_, target_cols 1496 ) 1497 else: 1498 # Use original getsims for backward compatibility 1499 self.sims_ = getsims(self.sims_)
Forecast all the time series, h steps ahead
1566 def score( 1567 self, 1568 X, 1569 training_index, 1570 testing_index, 1571 scoring=None, 1572 alpha=0.5, 1573 **kwargs, 1574 ): 1575 """Train on training_index, score on testing_index.""" 1576 1577 assert ( 1578 bool(set(training_index).intersection(set(testing_index))) == False 1579 ), "Non-overlapping 'training_index' and 'testing_index' required" 1580 1581 # Dimensions 1582 try: 1583 # multivariate time series 1584 n, p = X.shape 1585 except: 1586 # univariate time series 1587 n = X.shape[0] 1588 p = 1 1589 1590 # Training and testing sets 1591 if p > 1: 1592 X_train = X[training_index, :] 1593 X_test = X[testing_index, :] 1594 else: 1595 X_train = X[training_index] 1596 X_test = X[testing_index] 1597 1598 # Horizon 1599 h = len(testing_index) 1600 assert ( 1601 len(training_index) + h 1602 ) <= n, "Please check lengths of training and testing windows" 1603 1604 # Fit and predict 1605 self.fit(X_train, **kwargs) 1606 preds = self.predict(h=h, **kwargs) 1607 1608 if scoring is None: 1609 scoring = "neg_root_mean_squared_error" 1610 1611 if scoring == "pinball": 1612 # Predict requested quantile 1613 q_pred = self.predict(h=h, quantiles=[alpha], **kwargs) 1614 # Handle multivariate 1615 scores = [] 1616 for j in range(p): 1617 series_name = getattr(self, "series_names", [f"Series_{j}"])[j] 1618 q_label = ( 1619 f"{int(alpha * 100):02d}" 1620 if (alpha * 100).is_integer() 1621 else f"{alpha:.3f}".replace(".", "_") 1622 ) 1623 col = f"quantile_{q_label}_{series_name}" 1624 if col not in q_pred.columns: 1625 raise ValueError( 1626 f"Column '{col}' not found in quantile forecast output." 1627 ) 1628 y_true_j = X_test[:, j] 1629 y_pred_j = q_pred[col].values 1630 # Compute pinball loss for this series 1631 loss = mean_pinball_loss(y_true_j, y_pred_j, alpha=alpha) 1632 scores.append(loss) 1633 # Return average over series 1634 return np.mean(scores) 1635 1636 if scoring == "crps": 1637 # Ensure simulations exist 1638 preds = self.predict(h=h, **kwargs) # triggers self.sims_ 1639 # Extract simulations: list of DataFrames → (R, h, p) 1640 sims_vals = np.stack( 1641 [sim.values for sim in self.sims_], axis=0 1642 ) # (R, h, p) 1643 crps_scores = [] 1644 for j in range(p): 1645 y_true_j = X_test[:, j] 1646 sims_j = sims_vals[:, :, j] # (R, h) 1647 crps_j = self._crps_ensemble(np.asarray(y_true_j), sims_j) 1648 crps_scores.append(np.mean(crps_j)) # average over horizon 1649 return np.mean(crps_scores) # average over series 1650 1651 # check inputs 1652 assert scoring in ( 1653 "explained_variance", 1654 "neg_mean_absolute_error", 1655 "neg_mean_squared_error", 1656 "neg_root_mean_squared_error", 1657 "neg_mean_squared_log_error", 1658 "neg_median_absolute_error", 1659 "r2", 1660 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 1661 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 1662 'neg_median_absolute_error', 'r2')" 1663 1664 scoring_options = { 1665 "explained_variance": skm2.explained_variance_score, 1666 "neg_mean_absolute_error": skm2.mean_absolute_error, 1667 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 1668 "neg_root_mean_squared_error": lambda x, y: np.sqrt( 1669 np.mean((x - y) ** 2) 1670 ), 1671 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 1672 "neg_median_absolute_error": skm2.median_absolute_error, 1673 "r2": skm2.r2_score, 1674 } 1675 1676 return scoring_options[scoring](X_test, preds)
Train on training_index, score on testing_index.
12class MTSStacker(MTS): 13 """ 14 Sequential stacking for time series with unified strategy. 15 16 Core Strategy: 17 1. Split data: half1 (base models) | half2 (meta-model) 18 2. Train base models on half1, predict half2 19 3. Create augmented dataset: [original_series | base_pred_1 | base_pred_2 | ...] 20 Stack as additional time series, extract target series 21 4. Train meta-MTS on half2 with augmented data 22 5. Retrain base models on half2 for temporal alignment 23 6. At prediction: base models forecast → augment → meta-model predicts 24 """ 25 26 def __init__( 27 self, 28 base_models, 29 meta_model, 30 split_ratio=0.5, 31 ): 32 """ 33 Parameters 34 ---------- 35 base_models : list of sklearn-compatible models 36 Base models (e.g., Ridge, Lasso, RandomForest) 37 meta_model : nnetsauce.MTS instance 38 MTS with type_pi='scp2-kde' or similar 39 split_ratio : float 40 Proportion for half1 (default: 0.5) 41 """ 42 self.base_models = base_models 43 self.meta_model = meta_model 44 self.split_ratio = split_ratio 45 self.fitted_base_models_ = [] 46 self.split_idx_ = None 47 self.mean_ = None 48 self.lower_ = None 49 self.upper_ = None 50 self.sims_ = None 51 self.output_dates_ = None 52 53 def fit(self, X, xreg=None, **kwargs): 54 """ 55 Fit MTSStacker using sequential stacking strategy. 56 57 Parameters 58 ---------- 59 X : array-like or DataFrame, shape (n_samples, n_features) 60 Training time series (most recent observations last) 61 xreg : array-like, optional 62 External regressors 63 **kwargs : dict 64 Additional parameters for base and meta models 65 66 Returns 67 ------- 68 self : object 69 """ 70 # 1. Store attributes and convert to DataFrame if needed 71 if isinstance(X, pd.DataFrame): 72 self.df_ = X.copy() 73 X_array = X.values 74 self.series_names = X.columns.tolist() 75 else: 76 X_array = np.asarray(X) 77 self.df_ = pd.DataFrame(X_array) 78 self.series_names = [f"series{i}" for i in range(X_array.shape[1])] 79 80 n_samples = X_array.shape[0] 81 self.n_series_ = X_array.shape[1] if X_array.ndim > 1 else 1 82 83 # 2. Split data into half1 and half2 84 split_idx = int(n_samples * self.split_ratio) 85 self.split_idx_ = split_idx 86 87 if split_idx < self.meta_model.lags: 88 raise ValueError( 89 f"Split creates insufficient data: split_idx={split_idx} < " 90 f"lags={self.meta_model.lags}. Reduce split_ratio or use fewer lags." 91 ) 92 93 half1 = X_array[:split_idx] 94 half2 = X_array[split_idx:] 95 96 # 3. Train base models on half1 and predict half2 97 base_preds = [] 98 temp_base_models = [] 99 100 for base_model in self.base_models: 101 # Wrap in MTS with same config as meta_model 102 base_mts = MTS( 103 obj=clone(base_model), 104 lags=self.meta_model.lags, 105 n_hidden_features=self.meta_model.n_hidden_features, 106 replications=self.meta_model.replications, 107 kernel=self.meta_model.kernel, 108 type_pi=None, # No prediction intervals for base models 109 ) 110 base_mts.fit(half1) 111 112 # Predict half2 113 pred = base_mts.predict(h=len(half2)) 114 115 # Handle different return types 116 if isinstance(pred, pd.DataFrame): 117 base_preds.append(pred.values) 118 elif isinstance(pred, np.ndarray): 119 base_preds.append(pred) 120 elif hasattr(pred, "mean"): 121 # Named tuple with mean attribute 122 mean_pred = pred.mean 123 base_preds.append( 124 mean_pred.values 125 if isinstance(mean_pred, pd.DataFrame) 126 else mean_pred 127 ) 128 else: 129 raise ValueError(f"Unexpected prediction type: {type(pred)}") 130 131 temp_base_models.append(base_mts) 132 133 # 4. Create augmented dataset: [original | base_pred_1 | base_pred_2 | ...] 134 base_preds_array = np.hstack( 135 base_preds 136 ) # shape: (len(half2), n_series * n_base_models) 137 138 if isinstance(X, pd.DataFrame): 139 half2_df = pd.DataFrame( 140 half2, 141 index=self.df_.index[split_idx:], 142 columns=self.series_names, 143 ) 144 base_preds_df = pd.DataFrame( 145 base_preds_array, 146 index=self.df_.index[split_idx:], 147 columns=[ 148 f"base_{i}_{j}" 149 for i in range(len(self.base_models)) 150 for j in range(self.n_series_) 151 ], 152 ) 153 augmented = pd.concat([half2_df, base_preds_df], axis=1) 154 else: 155 augmented = np.hstack([half2, base_preds_array]) 156 157 # 5. Train meta-model on augmented half2 158 self.meta_model.fit(augmented, xreg=xreg, **kwargs) 159 160 # Store meta-model attributes 161 self.output_dates_ = self.meta_model.output_dates_ 162 self.fit_objs_ = self.meta_model.fit_objs_ 163 self.y_ = self.meta_model.y_ 164 self.X_ = self.meta_model.X_ 165 self.xreg_ = self.meta_model.xreg_ 166 self.y_means_ = self.meta_model.y_means_ 167 self.residuals_ = self.meta_model.residuals_ 168 169 # 6. FIXED: Retrain base models on half2 for temporal alignment 170 self.fitted_base_models_ = [] 171 for i, base_model in enumerate(self.base_models): 172 base_mts_final = MTS( 173 obj=clone(base_model), 174 lags=self.meta_model.lags, 175 n_hidden_features=self.meta_model.n_hidden_features, 176 replications=self.meta_model.replications, 177 kernel=self.meta_model.kernel, 178 type_pi=None, 179 ) 180 base_mts_final.fit(half2) 181 self.fitted_base_models_.append(base_mts_final) 182 183 return self 184 185 def predict(self, h=5, level=95, **kwargs): 186 """ 187 Forecast h steps ahead using stacked predictions. 188 189 FIXED: Now properly generates base model forecasts and uses them 190 to create augmented features for the meta-model. 191 192 Parameters 193 ---------- 194 h : int 195 Forecast horizon 196 level : int 197 Confidence level for prediction intervals 198 **kwargs : dict 199 Additional parameters for prediction 200 201 Returns 202 ------- 203 DescribeResult or DataFrame 204 Predictions with optional intervals/simulations 205 """ 206 # Step 1: Generate base model forecasts for horizon h 207 base_forecasts = [] 208 209 for base_mts in self.fitted_base_models_: 210 # Each base model forecasts h steps ahead 211 forecast = base_mts.predict(h=h) 212 213 # Extract mean prediction 214 if isinstance(forecast, pd.DataFrame): 215 base_forecasts.append(forecast.values) 216 elif isinstance(forecast, np.ndarray): 217 base_forecasts.append(forecast) 218 elif hasattr(forecast, "mean"): 219 mean_pred = forecast.mean 220 base_forecasts.append( 221 mean_pred.values 222 if isinstance(mean_pred, pd.DataFrame) 223 else mean_pred 224 ) 225 else: 226 raise ValueError(f"Unexpected forecast type: {type(forecast)}") 227 228 # Step 2: Stack base forecasts into augmented features 229 base_forecasts_array = np.hstack( 230 base_forecasts 231 ) # shape: (h, n_series * n_base) 232 233 # Step 3: Create augmented input for meta-model 234 # The meta-model needs the original series structure + base predictions 235 # We use recursive forecasting: predict one step, update history, repeat 236 237 # Get last window of data from training 238 last_window = self.df_.iloc[-self.meta_model.lags:].values 239 240 # Initialize containers for results 241 all_forecasts = [] 242 all_lowers = [] if level is not None else None 243 all_uppers = [] if level is not None else None 244 all_sims = ( 245 [] 246 if hasattr(self.meta_model, "type_pi") and self.meta_model.type_pi 247 else None 248 ) 249 250 # Recursive forecasting 251 current_window = last_window.copy() 252 253 for step in range(h): 254 # Create augmented input: [current_window_last_row | base_forecast_step] 255 # Note: meta-model was trained on [original | base_preds] 256 # For prediction, we need to simulate this structure 257 258 # Use the base forecast for this step 259 base_forecast_step = base_forecasts_array[ 260 step: step + 1, : 261 ] # shape: (1, n_base_features) 262 263 # Create a dummy augmented dataset for this step 264 # Combine last observed values with base predictions 265 last_obs = current_window[-1:, :] # shape: (1, n_series) 266 augmented_step = np.hstack([last_obs, base_forecast_step]) 267 268 # Convert to DataFrame if needed 269 if isinstance(self.df_, pd.DataFrame): 270 augmented_df = pd.DataFrame( 271 augmented_step, 272 columns=( 273 self.series_names 274 + [ 275 f"base_{i}_{j}" 276 for i in range(len(self.base_models)) 277 for j in range(self.n_series_) 278 ] 279 ), 280 ) 281 else: 282 augmented_df = augmented_step 283 284 # Predict one step with meta-model 285 # This is tricky: we need to use meta-model's internal predict 286 # but with our augmented data structure 287 288 # For now, use the standard predict and extract one step 289 step_result = self.meta_model.predict(h=1, level=level, **kwargs) 290 291 # Extract forecasts 292 if isinstance(step_result, pd.DataFrame): 293 forecast_step = step_result.iloc[0, : self.n_series_].values 294 all_forecasts.append(forecast_step) 295 elif isinstance(step_result, np.ndarray): 296 forecast_step = step_result[0, : self.n_series_] 297 all_forecasts.append(forecast_step) 298 elif hasattr(step_result, "mean"): 299 mean_pred = step_result.mean 300 if isinstance(mean_pred, pd.DataFrame): 301 forecast_step = mean_pred.iloc[0, : self.n_series_].values 302 else: 303 forecast_step = mean_pred[0, : self.n_series_] 304 all_forecasts.append(forecast_step) 305 306 # Extract intervals if available 307 if hasattr(step_result, "lower") and all_lowers is not None: 308 lower_pred = step_result.lower 309 if isinstance(lower_pred, pd.DataFrame): 310 all_lowers.append( 311 lower_pred.iloc[0, : self.n_series_].values 312 ) 313 else: 314 all_lowers.append(lower_pred[0, : self.n_series_]) 315 316 if hasattr(step_result, "upper") and all_uppers is not None: 317 upper_pred = step_result.upper 318 if isinstance(upper_pred, pd.DataFrame): 319 all_uppers.append( 320 upper_pred.iloc[0, : self.n_series_].values 321 ) 322 else: 323 all_uppers.append(upper_pred[0, : self.n_series_]) 324 325 # Extract simulations if available 326 if hasattr(step_result, "sims") and all_sims is not None: 327 all_sims.append(step_result.sims) 328 329 # Update window for next iteration 330 current_window = np.vstack( 331 [current_window[1:], forecast_step.reshape(1, -1)] 332 ) 333 334 # Combine all forecasts 335 forecasts_array = np.array(all_forecasts) 336 337 # Create output dates 338 if hasattr(self.df_, "index") and isinstance( 339 self.df_.index, pd.DatetimeIndex 340 ): 341 last_date = self.df_.index[-1] 342 freq = pd.infer_freq(self.df_.index) 343 if freq: 344 output_dates = pd.date_range( 345 start=last_date, periods=h + 1, freq=freq 346 )[1:] 347 else: 348 output_dates = pd.RangeIndex( 349 start=len(self.df_), stop=len(self.df_) + h 350 ) 351 else: 352 output_dates = pd.RangeIndex( 353 start=len(self.df_), stop=len(self.df_) + h 354 ) 355 356 self.output_dates_ = output_dates 357 358 # Format output 359 mean_df = pd.DataFrame( 360 forecasts_array, 361 index=output_dates, 362 columns=self.series_names[: self.n_series_], 363 ) 364 self.mean_ = mean_df 365 366 # Return based on what was computed 367 if all_lowers and all_uppers: 368 lowers_array = np.array(all_lowers) 369 uppers_array = np.array(all_uppers) 370 371 lower_df = pd.DataFrame( 372 lowers_array, 373 index=output_dates, 374 columns=self.series_names[: self.n_series_], 375 ) 376 upper_df = pd.DataFrame( 377 uppers_array, 378 index=output_dates, 379 columns=self.series_names[: self.n_series_], 380 ) 381 382 self.lower_ = lower_df 383 self.upper_ = upper_df 384 385 if all_sims: 386 self.sims_ = tuple(all_sims) 387 DescribeResult = namedtuple( 388 "DescribeResult", ("mean", "sims", "lower", "upper") 389 ) 390 return DescribeResult(mean_df, self.sims_, lower_df, upper_df) 391 else: 392 DescribeResult = namedtuple( 393 "DescribeResult", ("mean", "lower", "upper") 394 ) 395 return DescribeResult(mean_df, lower_df, upper_df) 396 else: 397 return mean_df 398 399 def plot(self, series=None, **kwargs): 400 """ 401 Plot the time series with forecasts and prediction intervals. 402 403 Parameters 404 ---------- 405 series : str or int, optional 406 Name or index of the series to plot (default: 0) 407 **kwargs : dict 408 Additional parameters for plotting 409 """ 410 # Ensure we have predictions 411 if self.mean_ is None: 412 raise ValueError( 413 "Model forecasting must be obtained first (call predict)" 414 ) 415 416 # Convert series name to index if needed 417 if isinstance(series, str): 418 if series in self.series_names: 419 series_idx = self.series_names.index(series) 420 else: 421 raise ValueError( 422 f"Series '{series}' doesn't exist in the input dataset" 423 ) 424 else: 425 series_idx = series if series is not None else 0 426 427 # Check bounds 428 if series_idx < 0 or series_idx >= self.n_series_: 429 raise ValueError( 430 f"Series index {series_idx} is out of bounds (0 to {self.n_series_ - 1})" 431 ) 432 433 # Prepare data for plotting 434 import matplotlib.pyplot as plt 435 import matplotlib.dates as mdates 436 437 # Get historical data 438 historical_data = self.df_.iloc[:, series_idx] 439 forecast_data = self.mean_.iloc[:, series_idx] 440 441 # Get prediction intervals if available 442 has_intervals = self.lower_ is not None and self.upper_ is not None 443 if has_intervals: 444 lower_data = self.lower_.iloc[:, series_idx] 445 upper_data = self.upper_.iloc[:, series_idx] 446 447 # Create figure 448 fig, ax = plt.subplots(figsize=(12, 6)) 449 450 # Plot historical data 451 if isinstance(self.df_.index, pd.DatetimeIndex): 452 hist_index = self.df_.index 453 ax.plot( 454 hist_index, 455 historical_data, 456 "-", 457 label="Historical", 458 color="blue", 459 linewidth=1.5, 460 ) 461 462 # Plot forecast 463 forecast_index = self.mean_.index 464 ax.plot( 465 forecast_index, 466 forecast_data, 467 "-", 468 label="Forecast", 469 color="red", 470 linewidth=1.5, 471 ) 472 473 # Plot prediction intervals 474 if has_intervals: 475 ax.fill_between( 476 forecast_index, 477 lower_data, 478 upper_data, 479 alpha=0.3, 480 color="red", 481 label="Prediction Interval", 482 ) 483 484 # Add vertical line at the split point 485 if self.split_idx_ is not None: 486 split_date = hist_index[self.split_idx_] 487 ax.axvline( 488 x=split_date, 489 color="gray", 490 linestyle="--", 491 alpha=0.5, 492 label="Train Split", 493 ) 494 495 # Format x-axis for dates 496 ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d")) 497 fig.autofmt_xdate() 498 else: 499 # Numeric indices 500 n_points_train = len(self.df_) 501 n_points_forecast = len(self.mean_) 502 503 x_hist = np.arange(n_points_train) 504 x_forecast = np.arange( 505 n_points_train, n_points_train + n_points_forecast 506 ) 507 508 ax.plot( 509 x_hist, 510 historical_data, 511 "-", 512 label="Historical", 513 color="blue", 514 linewidth=1.5, 515 ) 516 ax.plot( 517 x_forecast, 518 forecast_data, 519 "-", 520 label="Forecast", 521 color="red", 522 linewidth=1.5, 523 ) 524 525 if has_intervals: 526 ax.fill_between( 527 x_forecast, 528 lower_data, 529 upper_data, 530 alpha=0.3, 531 color="red", 532 label="Prediction Interval", 533 ) 534 535 if self.split_idx_ is not None: 536 ax.axvline( 537 x=self.split_idx_, 538 color="gray", 539 linestyle="--", 540 alpha=0.5, 541 label="Train Split", 542 ) 543 544 # Set title and labels 545 series_name = ( 546 self.series_names[series_idx] 547 if series_idx < len(self.series_names) 548 else f"Series {series_idx}" 549 ) 550 plt.title(f"Forecast for {series_name}", fontsize=14, fontweight="bold") 551 plt.xlabel("Time") 552 plt.ylabel("Value") 553 plt.legend() 554 plt.grid(True, alpha=0.3) 555 plt.tight_layout() 556 plt.show()
Sequential stacking for time series with unified strategy.
Core Strategy:
- Split data: half1 (base models) | half2 (meta-model)
- Train base models on half1, predict half2
- Create augmented dataset: [original_series | base_pred_1 | base_pred_2 | ...] Stack as additional time series, extract target series
- Train meta-MTS on half2 with augmented data
- Retrain base models on half2 for temporal alignment
- At prediction: base models forecast → augment → meta-model predicts
53 def fit(self, X, xreg=None, **kwargs): 54 """ 55 Fit MTSStacker using sequential stacking strategy. 56 57 Parameters 58 ---------- 59 X : array-like or DataFrame, shape (n_samples, n_features) 60 Training time series (most recent observations last) 61 xreg : array-like, optional 62 External regressors 63 **kwargs : dict 64 Additional parameters for base and meta models 65 66 Returns 67 ------- 68 self : object 69 """ 70 # 1. Store attributes and convert to DataFrame if needed 71 if isinstance(X, pd.DataFrame): 72 self.df_ = X.copy() 73 X_array = X.values 74 self.series_names = X.columns.tolist() 75 else: 76 X_array = np.asarray(X) 77 self.df_ = pd.DataFrame(X_array) 78 self.series_names = [f"series{i}" for i in range(X_array.shape[1])] 79 80 n_samples = X_array.shape[0] 81 self.n_series_ = X_array.shape[1] if X_array.ndim > 1 else 1 82 83 # 2. Split data into half1 and half2 84 split_idx = int(n_samples * self.split_ratio) 85 self.split_idx_ = split_idx 86 87 if split_idx < self.meta_model.lags: 88 raise ValueError( 89 f"Split creates insufficient data: split_idx={split_idx} < " 90 f"lags={self.meta_model.lags}. Reduce split_ratio or use fewer lags." 91 ) 92 93 half1 = X_array[:split_idx] 94 half2 = X_array[split_idx:] 95 96 # 3. Train base models on half1 and predict half2 97 base_preds = [] 98 temp_base_models = [] 99 100 for base_model in self.base_models: 101 # Wrap in MTS with same config as meta_model 102 base_mts = MTS( 103 obj=clone(base_model), 104 lags=self.meta_model.lags, 105 n_hidden_features=self.meta_model.n_hidden_features, 106 replications=self.meta_model.replications, 107 kernel=self.meta_model.kernel, 108 type_pi=None, # No prediction intervals for base models 109 ) 110 base_mts.fit(half1) 111 112 # Predict half2 113 pred = base_mts.predict(h=len(half2)) 114 115 # Handle different return types 116 if isinstance(pred, pd.DataFrame): 117 base_preds.append(pred.values) 118 elif isinstance(pred, np.ndarray): 119 base_preds.append(pred) 120 elif hasattr(pred, "mean"): 121 # Named tuple with mean attribute 122 mean_pred = pred.mean 123 base_preds.append( 124 mean_pred.values 125 if isinstance(mean_pred, pd.DataFrame) 126 else mean_pred 127 ) 128 else: 129 raise ValueError(f"Unexpected prediction type: {type(pred)}") 130 131 temp_base_models.append(base_mts) 132 133 # 4. Create augmented dataset: [original | base_pred_1 | base_pred_2 | ...] 134 base_preds_array = np.hstack( 135 base_preds 136 ) # shape: (len(half2), n_series * n_base_models) 137 138 if isinstance(X, pd.DataFrame): 139 half2_df = pd.DataFrame( 140 half2, 141 index=self.df_.index[split_idx:], 142 columns=self.series_names, 143 ) 144 base_preds_df = pd.DataFrame( 145 base_preds_array, 146 index=self.df_.index[split_idx:], 147 columns=[ 148 f"base_{i}_{j}" 149 for i in range(len(self.base_models)) 150 for j in range(self.n_series_) 151 ], 152 ) 153 augmented = pd.concat([half2_df, base_preds_df], axis=1) 154 else: 155 augmented = np.hstack([half2, base_preds_array]) 156 157 # 5. Train meta-model on augmented half2 158 self.meta_model.fit(augmented, xreg=xreg, **kwargs) 159 160 # Store meta-model attributes 161 self.output_dates_ = self.meta_model.output_dates_ 162 self.fit_objs_ = self.meta_model.fit_objs_ 163 self.y_ = self.meta_model.y_ 164 self.X_ = self.meta_model.X_ 165 self.xreg_ = self.meta_model.xreg_ 166 self.y_means_ = self.meta_model.y_means_ 167 self.residuals_ = self.meta_model.residuals_ 168 169 # 6. FIXED: Retrain base models on half2 for temporal alignment 170 self.fitted_base_models_ = [] 171 for i, base_model in enumerate(self.base_models): 172 base_mts_final = MTS( 173 obj=clone(base_model), 174 lags=self.meta_model.lags, 175 n_hidden_features=self.meta_model.n_hidden_features, 176 replications=self.meta_model.replications, 177 kernel=self.meta_model.kernel, 178 type_pi=None, 179 ) 180 base_mts_final.fit(half2) 181 self.fitted_base_models_.append(base_mts_final) 182 183 return self
Fit MTSStacker using sequential stacking strategy.
Parameters
X : array-like or DataFrame, shape (n_samples, n_features) Training time series (most recent observations last) xreg : array-like, optional External regressors **kwargs : dict Additional parameters for base and meta models
Returns
self : object
185 def predict(self, h=5, level=95, **kwargs): 186 """ 187 Forecast h steps ahead using stacked predictions. 188 189 FIXED: Now properly generates base model forecasts and uses them 190 to create augmented features for the meta-model. 191 192 Parameters 193 ---------- 194 h : int 195 Forecast horizon 196 level : int 197 Confidence level for prediction intervals 198 **kwargs : dict 199 Additional parameters for prediction 200 201 Returns 202 ------- 203 DescribeResult or DataFrame 204 Predictions with optional intervals/simulations 205 """ 206 # Step 1: Generate base model forecasts for horizon h 207 base_forecasts = [] 208 209 for base_mts in self.fitted_base_models_: 210 # Each base model forecasts h steps ahead 211 forecast = base_mts.predict(h=h) 212 213 # Extract mean prediction 214 if isinstance(forecast, pd.DataFrame): 215 base_forecasts.append(forecast.values) 216 elif isinstance(forecast, np.ndarray): 217 base_forecasts.append(forecast) 218 elif hasattr(forecast, "mean"): 219 mean_pred = forecast.mean 220 base_forecasts.append( 221 mean_pred.values 222 if isinstance(mean_pred, pd.DataFrame) 223 else mean_pred 224 ) 225 else: 226 raise ValueError(f"Unexpected forecast type: {type(forecast)}") 227 228 # Step 2: Stack base forecasts into augmented features 229 base_forecasts_array = np.hstack( 230 base_forecasts 231 ) # shape: (h, n_series * n_base) 232 233 # Step 3: Create augmented input for meta-model 234 # The meta-model needs the original series structure + base predictions 235 # We use recursive forecasting: predict one step, update history, repeat 236 237 # Get last window of data from training 238 last_window = self.df_.iloc[-self.meta_model.lags:].values 239 240 # Initialize containers for results 241 all_forecasts = [] 242 all_lowers = [] if level is not None else None 243 all_uppers = [] if level is not None else None 244 all_sims = ( 245 [] 246 if hasattr(self.meta_model, "type_pi") and self.meta_model.type_pi 247 else None 248 ) 249 250 # Recursive forecasting 251 current_window = last_window.copy() 252 253 for step in range(h): 254 # Create augmented input: [current_window_last_row | base_forecast_step] 255 # Note: meta-model was trained on [original | base_preds] 256 # For prediction, we need to simulate this structure 257 258 # Use the base forecast for this step 259 base_forecast_step = base_forecasts_array[ 260 step: step + 1, : 261 ] # shape: (1, n_base_features) 262 263 # Create a dummy augmented dataset for this step 264 # Combine last observed values with base predictions 265 last_obs = current_window[-1:, :] # shape: (1, n_series) 266 augmented_step = np.hstack([last_obs, base_forecast_step]) 267 268 # Convert to DataFrame if needed 269 if isinstance(self.df_, pd.DataFrame): 270 augmented_df = pd.DataFrame( 271 augmented_step, 272 columns=( 273 self.series_names 274 + [ 275 f"base_{i}_{j}" 276 for i in range(len(self.base_models)) 277 for j in range(self.n_series_) 278 ] 279 ), 280 ) 281 else: 282 augmented_df = augmented_step 283 284 # Predict one step with meta-model 285 # This is tricky: we need to use meta-model's internal predict 286 # but with our augmented data structure 287 288 # For now, use the standard predict and extract one step 289 step_result = self.meta_model.predict(h=1, level=level, **kwargs) 290 291 # Extract forecasts 292 if isinstance(step_result, pd.DataFrame): 293 forecast_step = step_result.iloc[0, : self.n_series_].values 294 all_forecasts.append(forecast_step) 295 elif isinstance(step_result, np.ndarray): 296 forecast_step = step_result[0, : self.n_series_] 297 all_forecasts.append(forecast_step) 298 elif hasattr(step_result, "mean"): 299 mean_pred = step_result.mean 300 if isinstance(mean_pred, pd.DataFrame): 301 forecast_step = mean_pred.iloc[0, : self.n_series_].values 302 else: 303 forecast_step = mean_pred[0, : self.n_series_] 304 all_forecasts.append(forecast_step) 305 306 # Extract intervals if available 307 if hasattr(step_result, "lower") and all_lowers is not None: 308 lower_pred = step_result.lower 309 if isinstance(lower_pred, pd.DataFrame): 310 all_lowers.append( 311 lower_pred.iloc[0, : self.n_series_].values 312 ) 313 else: 314 all_lowers.append(lower_pred[0, : self.n_series_]) 315 316 if hasattr(step_result, "upper") and all_uppers is not None: 317 upper_pred = step_result.upper 318 if isinstance(upper_pred, pd.DataFrame): 319 all_uppers.append( 320 upper_pred.iloc[0, : self.n_series_].values 321 ) 322 else: 323 all_uppers.append(upper_pred[0, : self.n_series_]) 324 325 # Extract simulations if available 326 if hasattr(step_result, "sims") and all_sims is not None: 327 all_sims.append(step_result.sims) 328 329 # Update window for next iteration 330 current_window = np.vstack( 331 [current_window[1:], forecast_step.reshape(1, -1)] 332 ) 333 334 # Combine all forecasts 335 forecasts_array = np.array(all_forecasts) 336 337 # Create output dates 338 if hasattr(self.df_, "index") and isinstance( 339 self.df_.index, pd.DatetimeIndex 340 ): 341 last_date = self.df_.index[-1] 342 freq = pd.infer_freq(self.df_.index) 343 if freq: 344 output_dates = pd.date_range( 345 start=last_date, periods=h + 1, freq=freq 346 )[1:] 347 else: 348 output_dates = pd.RangeIndex( 349 start=len(self.df_), stop=len(self.df_) + h 350 ) 351 else: 352 output_dates = pd.RangeIndex( 353 start=len(self.df_), stop=len(self.df_) + h 354 ) 355 356 self.output_dates_ = output_dates 357 358 # Format output 359 mean_df = pd.DataFrame( 360 forecasts_array, 361 index=output_dates, 362 columns=self.series_names[: self.n_series_], 363 ) 364 self.mean_ = mean_df 365 366 # Return based on what was computed 367 if all_lowers and all_uppers: 368 lowers_array = np.array(all_lowers) 369 uppers_array = np.array(all_uppers) 370 371 lower_df = pd.DataFrame( 372 lowers_array, 373 index=output_dates, 374 columns=self.series_names[: self.n_series_], 375 ) 376 upper_df = pd.DataFrame( 377 uppers_array, 378 index=output_dates, 379 columns=self.series_names[: self.n_series_], 380 ) 381 382 self.lower_ = lower_df 383 self.upper_ = upper_df 384 385 if all_sims: 386 self.sims_ = tuple(all_sims) 387 DescribeResult = namedtuple( 388 "DescribeResult", ("mean", "sims", "lower", "upper") 389 ) 390 return DescribeResult(mean_df, self.sims_, lower_df, upper_df) 391 else: 392 DescribeResult = namedtuple( 393 "DescribeResult", ("mean", "lower", "upper") 394 ) 395 return DescribeResult(mean_df, lower_df, upper_df) 396 else: 397 return mean_df
Forecast h steps ahead using stacked predictions.
FIXED: Now properly generates base model forecasts and uses them to create augmented features for the meta-model.
Parameters
h : int Forecast horizon level : int Confidence level for prediction intervals **kwargs : dict Additional parameters for prediction
Returns
DescribeResult or DataFrame Predictions with optional intervals/simulations
14class MultiOutputMTS(MTS): 15 """MTS subclass optimized for multivariate time series with vectorized models 16 17 Enforces n_series >= 2 and uses single vectorized fit call instead of per-series loop. 18 Works with sklearn models supporting multi-output (Ridge, Lasso, LinearRegression, etc.) 19 """ 20 21 def fit(self, X, xreg=None, **kwargs): 22 """Fit with vectorized multi-output model - requires n_series >= 2""" 23 24 # Validate multivariate input 25 try: 26 self.init_n_series_ = X.shape[1] 27 except IndexError: 28 raise ValueError( 29 "MultiOutputMTS requires multivariate input (n_samples, n_series)" 30 ) 31 32 if self.init_n_series_ < 2: 33 raise ValueError( 34 f"MultiOutputMTS requires at least 2 series, got {self.init_n_series_}" 35 ) 36 37 # Automatic lag selection if requested (copied from parent) 38 if isinstance(self.lags, str): 39 max_lags = min(25, X.shape[0] // 4) 40 best_ic = float("inf") 41 best_lags = 1 42 43 if self.verbose: 44 print( 45 f"\nSelecting optimal number of lags using {self.lags}..." 46 ) 47 iterator = tqdm(range(1, max_lags + 1)) 48 else: 49 iterator = range(1, max_lags + 1) 50 51 for lag in iterator: 52 if isinstance(X, pd.DataFrame): 53 X_values = X.values[::-1] 54 else: 55 X_values = X[::-1] 56 57 mts_input = ts.create_train_inputs(X_values, lag) 58 dummy_y, scaled_Z = self.cook_training_set( 59 y=np.ones(mts_input[0].shape[0]), X=mts_input[1] 60 ) 61 62 # Vectorized fit for lag selection 63 y_means = np.mean(mts_input[0], axis=0) 64 centered_y = mts_input[0] - y_means[np.newaxis, :] 65 self.obj.fit(X=scaled_Z, y=centered_y) 66 residuals = centered_y - self.obj.predict(scaled_Z) 67 self.residuals_ = residuals # Keep (n_obs, n_series) shape 68 69 ic = self._compute_information_criterion( 70 curr_lags=lag, criterion=self.lags 71 ) 72 73 if self.verbose: 74 print(f"Trying lags={lag}, {self.lags}={ic:.2f}") 75 76 if ic < best_ic: 77 best_ic = ic 78 best_lags = lag 79 80 if self.verbose: 81 print( 82 f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}" 83 ) 84 85 self.lags = best_lags 86 87 # Data preprocessing (from parent) 88 self.input_dates = None 89 self.df_ = None 90 91 if isinstance(X, pd.DataFrame) is False: 92 if xreg is None: 93 X = pd.DataFrame(X) 94 self.series_names = [ 95 "series" + str(i) for i in range(X.shape[1]) 96 ] 97 else: 98 X = mo.cbind(X, xreg) 99 self.xreg_ = xreg 100 else: 101 X_index = None 102 if X.index is not None: 103 X_index = X.index 104 if xreg is None: 105 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 106 else: 107 X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg)) 108 self.xreg_ = xreg 109 if X_index is not None: 110 X.index = X_index 111 self.series_names = X.columns.tolist() 112 113 if isinstance(X, pd.DataFrame): 114 if self.df_ is None: 115 self.df_ = X 116 X = X.values 117 else: 118 input_dates_prev = pd.DatetimeIndex(self.df_.index.values) 119 frequency = pd.infer_freq(input_dates_prev) 120 self.df_ = pd.concat([self.df_, X], axis=0) 121 self.input_dates = pd.date_range( 122 start=input_dates_prev[0], 123 periods=len(input_dates_prev) + X.shape[0], 124 freq=frequency, 125 ).values.tolist() 126 self.df_.index = self.input_dates 127 X = self.df_.values 128 self.df_.columns = self.series_names 129 else: 130 if self.df_ is None: 131 self.df_ = pd.DataFrame(X, columns=self.series_names) 132 else: 133 self.df_ = pd.concat( 134 [self.df_, pd.DataFrame(X, columns=self.series_names)], 135 axis=0, 136 ) 137 138 self.input_dates = ts.compute_input_dates(self.df_) 139 140 n, p = X.shape 141 self.n_obs_ = n 142 rep_1_n = np.repeat(1, n) 143 144 self.y_ = None 145 self.X_ = None 146 self.n_series = p 147 self.fit_objs_.clear() 148 self.y_means_.clear() 149 self.residuals_ = None 150 self.residuals_sims_ = None 151 self.kde_ = None 152 self.sims_ = None 153 self.scaled_Z_ = None 154 self.centered_y_is_ = [] 155 156 # Create training inputs 157 mts_input = ts.create_train_inputs(X[::-1], self.lags) 158 self.y_ = mts_input[0] 159 self.X_ = mts_input[1] 160 161 dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_) 162 self.scaled_Z_ = scaled_Z 163 164 if self.verbose > 0: 165 print( 166 f"\n Adjusting {type(self.obj).__name__} to multivariate time series (vectorized)... \n" 167 ) 168 169 # VECTORIZED FITTING - NO LOOP 170 y_means_array = np.array( 171 [np.mean(self.y_[:, i]) for i in range(self.init_n_series_)] 172 ) 173 for i in range(self.init_n_series_): 174 self.y_means_[i] = y_means_array[i] 175 176 centered_y_all = self.y_ - y_means_array[np.newaxis, :] 177 self.centered_y_is_ = [ 178 centered_y_all[:, i] for i in range(self.init_n_series_) 179 ] 180 181 # Single vectorized fit for all series 182 self.obj.fit(scaled_Z, centered_y_all) 183 184 # All series share the same model 185 for i in range(self.init_n_series_): 186 self.fit_objs_[i] = self.obj 187 188 # Vectorized residuals - ONLY target columns (n_obs, n_series) 189 preds_all = self.obj.predict(scaled_Z) 190 residuals_raw = centered_y_all - preds_all 191 192 # CRITICAL: Ensure residuals only have n_series columns, not all scaled_Z columns 193 # In case there's some dimension mismatch, explicitly slice 194 self.residuals_ = residuals_raw[:, : self.init_n_series_] 195 196 # Handle type_pi 197 if self.type_pi == "gaussian": 198 self.gaussian_preds_std_ = np.std(self.residuals_, axis=0) 199 200 if self.type_pi.startswith("scp2"): 201 data_mean = np.mean(self.residuals_, axis=0) 202 self.residuals_std_dev_ = np.std(self.residuals_, axis=0) 203 self.residuals_ = ( 204 self.residuals_ - data_mean[np.newaxis, :] 205 ) / self.residuals_std_dev_[np.newaxis, :] 206 207 if self.replications is not None and "kde" in self.type_pi: 208 if self.verbose > 0: 209 print(f"\n Simulate residuals using {self.kernel} kernel... \n") 210 assert self.kernel in ( 211 "gaussian", 212 "tophat", 213 ), "currently, 'kernel' must be either 'gaussian' or 'tophat'" 214 kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)} 215 grid = GridSearchCV( 216 KernelDensity(kernel=self.kernel, **kwargs), 217 param_grid=kernel_bandwidths, 218 ) 219 grid.fit(self.residuals_) 220 if self.verbose > 0: 221 print( 222 f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n" 223 ) 224 self.kde_ = grid.best_estimator_ 225 226 return self 227 228 def predict(self, h=5, level=95, quantiles=None, **kwargs): 229 """Override predict to handle vectorized model predictions""" 230 231 # Delegate to parent for quantiles and multiple levels 232 if quantiles is not None or isinstance(level, (list, np.ndarray)): 233 return super().predict( 234 h=h, level=level, quantiles=quantiles, **kwargs 235 ) 236 237 # Store original obj temporarily 238 original_obj = self.obj 239 240 # Create wrapper that extracts the i-th output for each series 241 class VectorizedWrapper: 242 def __init__(self, model, series_idx): 243 self.model = model 244 self.series_idx = series_idx 245 246 def predict(self, X, **kw): 247 """Predict and return only the output for this series index""" 248 preds = self.model.predict(X, **kw) 249 # preds shape: (n_samples, n_series) or (n_series,) 250 if len(preds.shape) == 1: 251 # Single prediction: (n_series,) 252 return preds[self.series_idx: self.series_idx + 1] 253 else: 254 # Multiple predictions: (n_samples, n_series) 255 return preds[ 256 :, self.series_idx: self.series_idx + 1 257 ].flatten() 258 259 # Wrap each series with its own index 260 for i in range(self.init_n_series_): 261 self.fit_objs_[i] = VectorizedWrapper(original_obj, i) 262 263 try: 264 result = super().predict( 265 h=h, level=level, quantiles=quantiles, **kwargs 266 ) 267 finally: 268 # Restore original 269 for i in range(self.init_n_series_): 270 self.fit_objs_[i] = original_obj 271 272 return result
MTS subclass optimized for multivariate time series with vectorized models
Enforces n_series >= 2 and uses single vectorized fit call instead of per-series loop. Works with sklearn models supporting multi-output (Ridge, Lasso, LinearRegression, etc.)
21 def fit(self, X, xreg=None, **kwargs): 22 """Fit with vectorized multi-output model - requires n_series >= 2""" 23 24 # Validate multivariate input 25 try: 26 self.init_n_series_ = X.shape[1] 27 except IndexError: 28 raise ValueError( 29 "MultiOutputMTS requires multivariate input (n_samples, n_series)" 30 ) 31 32 if self.init_n_series_ < 2: 33 raise ValueError( 34 f"MultiOutputMTS requires at least 2 series, got {self.init_n_series_}" 35 ) 36 37 # Automatic lag selection if requested (copied from parent) 38 if isinstance(self.lags, str): 39 max_lags = min(25, X.shape[0] // 4) 40 best_ic = float("inf") 41 best_lags = 1 42 43 if self.verbose: 44 print( 45 f"\nSelecting optimal number of lags using {self.lags}..." 46 ) 47 iterator = tqdm(range(1, max_lags + 1)) 48 else: 49 iterator = range(1, max_lags + 1) 50 51 for lag in iterator: 52 if isinstance(X, pd.DataFrame): 53 X_values = X.values[::-1] 54 else: 55 X_values = X[::-1] 56 57 mts_input = ts.create_train_inputs(X_values, lag) 58 dummy_y, scaled_Z = self.cook_training_set( 59 y=np.ones(mts_input[0].shape[0]), X=mts_input[1] 60 ) 61 62 # Vectorized fit for lag selection 63 y_means = np.mean(mts_input[0], axis=0) 64 centered_y = mts_input[0] - y_means[np.newaxis, :] 65 self.obj.fit(X=scaled_Z, y=centered_y) 66 residuals = centered_y - self.obj.predict(scaled_Z) 67 self.residuals_ = residuals # Keep (n_obs, n_series) shape 68 69 ic = self._compute_information_criterion( 70 curr_lags=lag, criterion=self.lags 71 ) 72 73 if self.verbose: 74 print(f"Trying lags={lag}, {self.lags}={ic:.2f}") 75 76 if ic < best_ic: 77 best_ic = ic 78 best_lags = lag 79 80 if self.verbose: 81 print( 82 f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}" 83 ) 84 85 self.lags = best_lags 86 87 # Data preprocessing (from parent) 88 self.input_dates = None 89 self.df_ = None 90 91 if isinstance(X, pd.DataFrame) is False: 92 if xreg is None: 93 X = pd.DataFrame(X) 94 self.series_names = [ 95 "series" + str(i) for i in range(X.shape[1]) 96 ] 97 else: 98 X = mo.cbind(X, xreg) 99 self.xreg_ = xreg 100 else: 101 X_index = None 102 if X.index is not None: 103 X_index = X.index 104 if xreg is None: 105 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 106 else: 107 X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg)) 108 self.xreg_ = xreg 109 if X_index is not None: 110 X.index = X_index 111 self.series_names = X.columns.tolist() 112 113 if isinstance(X, pd.DataFrame): 114 if self.df_ is None: 115 self.df_ = X 116 X = X.values 117 else: 118 input_dates_prev = pd.DatetimeIndex(self.df_.index.values) 119 frequency = pd.infer_freq(input_dates_prev) 120 self.df_ = pd.concat([self.df_, X], axis=0) 121 self.input_dates = pd.date_range( 122 start=input_dates_prev[0], 123 periods=len(input_dates_prev) + X.shape[0], 124 freq=frequency, 125 ).values.tolist() 126 self.df_.index = self.input_dates 127 X = self.df_.values 128 self.df_.columns = self.series_names 129 else: 130 if self.df_ is None: 131 self.df_ = pd.DataFrame(X, columns=self.series_names) 132 else: 133 self.df_ = pd.concat( 134 [self.df_, pd.DataFrame(X, columns=self.series_names)], 135 axis=0, 136 ) 137 138 self.input_dates = ts.compute_input_dates(self.df_) 139 140 n, p = X.shape 141 self.n_obs_ = n 142 rep_1_n = np.repeat(1, n) 143 144 self.y_ = None 145 self.X_ = None 146 self.n_series = p 147 self.fit_objs_.clear() 148 self.y_means_.clear() 149 self.residuals_ = None 150 self.residuals_sims_ = None 151 self.kde_ = None 152 self.sims_ = None 153 self.scaled_Z_ = None 154 self.centered_y_is_ = [] 155 156 # Create training inputs 157 mts_input = ts.create_train_inputs(X[::-1], self.lags) 158 self.y_ = mts_input[0] 159 self.X_ = mts_input[1] 160 161 dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_) 162 self.scaled_Z_ = scaled_Z 163 164 if self.verbose > 0: 165 print( 166 f"\n Adjusting {type(self.obj).__name__} to multivariate time series (vectorized)... \n" 167 ) 168 169 # VECTORIZED FITTING - NO LOOP 170 y_means_array = np.array( 171 [np.mean(self.y_[:, i]) for i in range(self.init_n_series_)] 172 ) 173 for i in range(self.init_n_series_): 174 self.y_means_[i] = y_means_array[i] 175 176 centered_y_all = self.y_ - y_means_array[np.newaxis, :] 177 self.centered_y_is_ = [ 178 centered_y_all[:, i] for i in range(self.init_n_series_) 179 ] 180 181 # Single vectorized fit for all series 182 self.obj.fit(scaled_Z, centered_y_all) 183 184 # All series share the same model 185 for i in range(self.init_n_series_): 186 self.fit_objs_[i] = self.obj 187 188 # Vectorized residuals - ONLY target columns (n_obs, n_series) 189 preds_all = self.obj.predict(scaled_Z) 190 residuals_raw = centered_y_all - preds_all 191 192 # CRITICAL: Ensure residuals only have n_series columns, not all scaled_Z columns 193 # In case there's some dimension mismatch, explicitly slice 194 self.residuals_ = residuals_raw[:, : self.init_n_series_] 195 196 # Handle type_pi 197 if self.type_pi == "gaussian": 198 self.gaussian_preds_std_ = np.std(self.residuals_, axis=0) 199 200 if self.type_pi.startswith("scp2"): 201 data_mean = np.mean(self.residuals_, axis=0) 202 self.residuals_std_dev_ = np.std(self.residuals_, axis=0) 203 self.residuals_ = ( 204 self.residuals_ - data_mean[np.newaxis, :] 205 ) / self.residuals_std_dev_[np.newaxis, :] 206 207 if self.replications is not None and "kde" in self.type_pi: 208 if self.verbose > 0: 209 print(f"\n Simulate residuals using {self.kernel} kernel... \n") 210 assert self.kernel in ( 211 "gaussian", 212 "tophat", 213 ), "currently, 'kernel' must be either 'gaussian' or 'tophat'" 214 kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)} 215 grid = GridSearchCV( 216 KernelDensity(kernel=self.kernel, **kwargs), 217 param_grid=kernel_bandwidths, 218 ) 219 grid.fit(self.residuals_) 220 if self.verbose > 0: 221 print( 222 f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n" 223 ) 224 self.kde_ = grid.best_estimator_ 225 226 return self
Fit with vectorized multi-output model - requires n_series >= 2
228 def predict(self, h=5, level=95, quantiles=None, **kwargs): 229 """Override predict to handle vectorized model predictions""" 230 231 # Delegate to parent for quantiles and multiple levels 232 if quantiles is not None or isinstance(level, (list, np.ndarray)): 233 return super().predict( 234 h=h, level=level, quantiles=quantiles, **kwargs 235 ) 236 237 # Store original obj temporarily 238 original_obj = self.obj 239 240 # Create wrapper that extracts the i-th output for each series 241 class VectorizedWrapper: 242 def __init__(self, model, series_idx): 243 self.model = model 244 self.series_idx = series_idx 245 246 def predict(self, X, **kw): 247 """Predict and return only the output for this series index""" 248 preds = self.model.predict(X, **kw) 249 # preds shape: (n_samples, n_series) or (n_series,) 250 if len(preds.shape) == 1: 251 # Single prediction: (n_series,) 252 return preds[self.series_idx: self.series_idx + 1] 253 else: 254 # Multiple predictions: (n_samples, n_series) 255 return preds[ 256 :, self.series_idx: self.series_idx + 1 257 ].flatten() 258 259 # Wrap each series with its own index 260 for i in range(self.init_n_series_): 261 self.fit_objs_[i] = VectorizedWrapper(original_obj, i) 262 263 try: 264 result = super().predict( 265 h=h, level=level, quantiles=quantiles, **kwargs 266 ) 267 finally: 268 # Restore original 269 for i in range(self.init_n_series_): 270 self.fit_objs_[i] = original_obj 271 272 return result
Override predict to handle vectorized model predictions
16class MultitaskClassifier(Base, ClassifierMixin): 17 """Multitask Classification model based on regression models, with shared covariates 18 19 Parameters: 20 21 obj: object 22 any object (must be a regression model) containing a method fit (obj.fit()) 23 and a method predict (obj.predict()) 24 25 n_hidden_features: int 26 number of nodes in the hidden layer 27 28 activation_name: str 29 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 30 31 a: float 32 hyperparameter for 'prelu' or 'elu' activation function 33 34 nodes_sim: str 35 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 36 'uniform' 37 38 bias: boolean 39 indicates if the hidden layer contains a bias term (True) or not 40 (False) 41 42 dropout: float 43 regularization parameter; (random) percentage of nodes dropped out 44 of the training 45 46 direct_link: boolean 47 indicates if the original predictors are included (True) in model's 48 fitting or not (False) 49 50 n_clusters: int 51 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 52 no clustering) 53 54 cluster_encode: bool 55 defines how the variable containing clusters is treated (default is one-hot) 56 if `False`, then labels are used, without one-hot encoding 57 58 type_clust: str 59 type of clustering method: currently k-means ('kmeans') or Gaussian 60 Mixture Model ('gmm') 61 62 type_scaling: a tuple of 3 strings 63 scaling methods for inputs, hidden layer, and clustering respectively 64 (and when relevant). 65 Currently available: standardization ('std') or MinMax scaling ('minmax') 66 67 col_sample: float 68 percentage of covariates randomly chosen for training 69 70 row_sample: float 71 percentage of rows chosen for training, by stratified bootstrapping 72 73 seed: int 74 reproducibility seed for nodes_sim=='uniform' 75 76 backend: str 77 "cpu" or "gpu" or "tpu" 78 79 Attributes: 80 81 fit_objs_: dict 82 objects adjusted to each individual time series 83 84 n_classes_: int 85 number of classes for the classifier 86 87 Examples: 88 89 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py) 90 91 ```python 92 import nnetsauce as ns 93 import numpy as np 94 from sklearn.datasets import load_breast_cancer 95 from sklearn.linear_model import LinearRegression 96 from sklearn.model_selection import train_test_split 97 from sklearn import metrics 98 from time import time 99 100 breast_cancer = load_breast_cancer() 101 Z = breast_cancer.data 102 t = breast_cancer.target 103 104 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2, 105 random_state=123+2*10) 106 107 # Linear Regression is used 108 regr = LinearRegression() 109 fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5, 110 n_clusters=2, type_clust="gmm") 111 112 start = time() 113 fit_obj.fit(X_train, y_train) 114 print(f"Elapsed {time() - start}") 115 116 print(fit_obj.score(X_test, y_test)) 117 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 118 119 start = time() 120 preds = fit_obj.predict(X_test) 121 print(f"Elapsed {time() - start}") 122 print(metrics.classification_report(preds, y_test)) 123 ``` 124 125 """ 126 127 # construct the object ----- 128 _estimator_type = "classifier" 129 130 def __init__( 131 self, 132 obj, 133 n_hidden_features=5, 134 activation_name="relu", 135 a=0.01, 136 nodes_sim="sobol", 137 bias=True, 138 dropout=0, 139 direct_link=True, 140 n_clusters=2, 141 cluster_encode=True, 142 type_clust="kmeans", 143 type_scaling=("std", "std", "std"), 144 col_sample=1, 145 row_sample=1, 146 seed=123, 147 backend="cpu", 148 ): 149 super().__init__( 150 n_hidden_features=n_hidden_features, 151 activation_name=activation_name, 152 a=a, 153 nodes_sim=nodes_sim, 154 bias=bias, 155 dropout=dropout, 156 direct_link=direct_link, 157 n_clusters=n_clusters, 158 cluster_encode=cluster_encode, 159 type_clust=type_clust, 160 type_scaling=type_scaling, 161 col_sample=col_sample, 162 row_sample=row_sample, 163 seed=seed, 164 backend=backend, 165 ) 166 167 self.type_fit = "classification" 168 self.obj = obj 169 self.fit_objs_ = {} 170 171 def fit(self, X, y, sample_weight=None, **kwargs): 172 """Fit MultitaskClassifier to training data (X, y). 173 174 Args: 175 176 X: {array-like}, shape = [n_samples, n_features] 177 Training vectors, where n_samples is the number 178 of samples and n_features is the number of features. 179 180 y: array-like, shape = [n_samples] 181 Target values. 182 183 **kwargs: additional parameters to be passed to 184 self.cook_training_set or self.obj.fit 185 186 Returns: 187 188 self: object 189 190 """ 191 192 assert mx.is_factor(y), "y must contain only integers" 193 194 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 195 196 self.classes_ = np.unique(y) # for compatibility with sklearn 197 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 198 199 # multitask response 200 Y = mo.one_hot_encode2(output_y, self.n_classes_) 201 202 # if sample_weight is None: 203 for i in range(self.n_classes_): 204 self.fit_objs_[i] = deepcopy( 205 self.obj.fit(scaled_Z, Y[:, i], **kwargs) 206 ) 207 208 self.classes_ = np.unique(y) 209 return self 210 211 def predict(self, X, **kwargs): 212 """Predict test data X. 213 214 Args: 215 216 X: {array-like}, shape = [n_samples, n_features] 217 Training vectors, where n_samples is the number 218 of samples and n_features is the number of features. 219 220 **kwargs: additional parameters to be passed to 221 self.cook_test_set 222 223 Returns: 224 225 model predictions: {array-like} 226 227 """ 228 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 229 230 def predict_proba(self, X, **kwargs): 231 """Predict probabilities for test data X. 232 233 Args: 234 235 X: {array-like}, shape = [n_samples, n_features] 236 Training vectors, where n_samples is the number 237 of samples and n_features is the number of features. 238 239 **kwargs: additional parameters to be passed to 240 self.cook_test_set 241 242 Returns: 243 244 probability estimates for test data: {array-like} 245 246 """ 247 248 shape_X = X.shape 249 250 probs = np.zeros((shape_X[0], self.n_classes_)) 251 252 if len(shape_X) == 1: 253 n_features = shape_X[0] 254 255 new_X = mo.rbind( 256 X.reshape(1, n_features), 257 np.ones(n_features).reshape(1, n_features), 258 ) 259 260 Z = self.cook_test_set(new_X, **kwargs) 261 262 # loop on all the classes 263 for i in range(self.n_classes_): 264 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 265 266 else: 267 Z = self.cook_test_set(X, **kwargs) 268 269 # loop on all the classes 270 for i in range(self.n_classes_): 271 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 272 273 expit_raw_probs = expit(probs) 274 275 return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None] 276 277 def decision_function(self, X, **kwargs): 278 """Compute the decision function of X. 279 280 Parameters: 281 X: {array-like}, shape = [n_samples, n_features] 282 Samples to compute decision function for. 283 284 **kwargs: additional parameters to be passed to 285 self.cook_test_set 286 287 Returns: 288 array-like of shape (n_samples,) or (n_samples, n_classes) 289 Decision function of the input samples. The order of outputs is the same 290 as that of the classes passed to fit. 291 """ 292 if not hasattr(self.obj, "decision_function"): 293 # If base classifier doesn't have decision_function, use predict_proba 294 proba = self.predict_proba(X, **kwargs) 295 if proba.shape[1] == 2: 296 return proba[:, 1] # For binary classification 297 return proba # For multiclass 298 299 if len(X.shape) == 1: 300 n_features = X.shape[0] 301 new_X = mo.rbind( 302 X.reshape(1, n_features), 303 np.ones(n_features).reshape(1, n_features), 304 ) 305 306 return ( 307 self.obj.decision_function( 308 self.cook_test_set(new_X, **kwargs), **kwargs 309 ) 310 )[0] 311 312 return self.obj.decision_function( 313 self.cook_test_set(X, **kwargs), **kwargs 314 ) 315 316 @property 317 def _estimator_type(self): 318 return "classifier"
Multitask Classification model based on regression models, with shared covariates
Parameters:
obj: object
any object (must be a regression model) containing a method fit (obj.fit())
and a method predict (obj.predict())
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
n_classes_: int
number of classes for the classifier
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
random_state=123+2*10)
# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5,
n_clusters=2, type_clust="gmm")
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
171 def fit(self, X, y, sample_weight=None, **kwargs): 172 """Fit MultitaskClassifier to training data (X, y). 173 174 Args: 175 176 X: {array-like}, shape = [n_samples, n_features] 177 Training vectors, where n_samples is the number 178 of samples and n_features is the number of features. 179 180 y: array-like, shape = [n_samples] 181 Target values. 182 183 **kwargs: additional parameters to be passed to 184 self.cook_training_set or self.obj.fit 185 186 Returns: 187 188 self: object 189 190 """ 191 192 assert mx.is_factor(y), "y must contain only integers" 193 194 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 195 196 self.classes_ = np.unique(y) # for compatibility with sklearn 197 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 198 199 # multitask response 200 Y = mo.one_hot_encode2(output_y, self.n_classes_) 201 202 # if sample_weight is None: 203 for i in range(self.n_classes_): 204 self.fit_objs_[i] = deepcopy( 205 self.obj.fit(scaled_Z, Y[:, i], **kwargs) 206 ) 207 208 self.classes_ = np.unique(y) 209 return self
Fit MultitaskClassifier to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
211 def predict(self, X, **kwargs): 212 """Predict test data X. 213 214 Args: 215 216 X: {array-like}, shape = [n_samples, n_features] 217 Training vectors, where n_samples is the number 218 of samples and n_features is the number of features. 219 220 **kwargs: additional parameters to be passed to 221 self.cook_test_set 222 223 Returns: 224 225 model predictions: {array-like} 226 227 """ 228 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
230 def predict_proba(self, X, **kwargs): 231 """Predict probabilities for test data X. 232 233 Args: 234 235 X: {array-like}, shape = [n_samples, n_features] 236 Training vectors, where n_samples is the number 237 of samples and n_features is the number of features. 238 239 **kwargs: additional parameters to be passed to 240 self.cook_test_set 241 242 Returns: 243 244 probability estimates for test data: {array-like} 245 246 """ 247 248 shape_X = X.shape 249 250 probs = np.zeros((shape_X[0], self.n_classes_)) 251 252 if len(shape_X) == 1: 253 n_features = shape_X[0] 254 255 new_X = mo.rbind( 256 X.reshape(1, n_features), 257 np.ones(n_features).reshape(1, n_features), 258 ) 259 260 Z = self.cook_test_set(new_X, **kwargs) 261 262 # loop on all the classes 263 for i in range(self.n_classes_): 264 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 265 266 else: 267 Z = self.cook_test_set(X, **kwargs) 268 269 # loop on all the classes 270 for i in range(self.n_classes_): 271 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 272 273 expit_raw_probs = expit(probs) 274 275 return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
112class NeuralNetRegressor(BaseEstimator, RegressorMixin): 113 """ 114 (Pretrained) Neural Network Regressor. 115 116 Parameters: 117 118 hidden_layer_sizes : tuple, default=(100,) 119 The number of neurons in each hidden layer. 120 max_iter : int, default=100 121 The maximum number of iterations to train the model. 122 learning_rate : float, default=0.01 123 The learning rate for the optimizer. 124 l1_ratio : float, default=0.5 125 The ratio of L1 regularization. 126 alpha : float, default=1e-6 127 The regularization parameter. 128 activation_name : str, default="relu" 129 The activation function to use. 130 dropout : float, default=0.0 131 The dropout rate. 132 random_state : int, default=None 133 The random state for the random number generator. 134 weights : list, default=None 135 The weights to initialize the model with. 136 137 Attributes: 138 139 weights : list 140 The weights of the model. 141 params : list 142 The parameters of the model. 143 scaler_ : sklearn.preprocessing.StandardScaler 144 The scaler used to standardize the input features. 145 y_mean_ : float 146 The mean of the target variable. 147 148 Methods: 149 150 fit(X, y) 151 Fit the model to the data. 152 predict(X) 153 Predict the target variable. 154 get_weights() 155 Get the weights of the model. 156 set_weights(weights) 157 Set the weights of the model. 158 """ 159 160 def __init__( 161 self, 162 hidden_layer_sizes=None, 163 max_iter=100, 164 learning_rate=0.01, 165 l1_ratio=0.5, 166 alpha=1e-6, 167 activation_name="relu", 168 dropout=0, 169 weights=None, 170 random_state=None, 171 ): 172 if not JAX_AVAILABLE: 173 raise RuntimeError( 174 "JAX is required for this feature. Install with: pip install yourpackage[jax]" 175 ) 176 177 if weights is None and hidden_layer_sizes is None: 178 hidden_layer_sizes = (100,) # default value if neither is provided 179 self.hidden_layer_sizes = hidden_layer_sizes 180 self.max_iter = max_iter 181 self.learning_rate = learning_rate 182 self.l1_ratio = l1_ratio 183 self.alpha = alpha 184 self.activation_name = activation_name 185 self.dropout = dropout 186 self.weights = weights 187 self.random_state = random_state 188 self.params = None 189 self.scaler_ = StandardScaler() 190 self.y_mean_ = None 191 192 def _validate_weights(self, input_dim): 193 """Validate that weights dimensions are coherent.""" 194 if not self.weights: 195 return False 196 197 try: 198 # Check each layer's weights and biases 199 prev_dim = input_dim 200 for W, b in self.weights: 201 # Check weight matrix dimensions 202 if W.shape[0] != prev_dim: 203 raise ValueError( 204 f"Weight matrix input dimension {W.shape[0]} does not match, previous layer output dimension {prev_dim}" 205 ) 206 # Check bias dimension matches weight matrix output 207 if W.shape[1] != b.shape[0]: 208 raise ValueError( 209 f"Bias dimension {b.shape[0]} does not match weight matrix, output dimension {W.shape[1]}" 210 ) 211 prev_dim = W.shape[1] 212 213 # Check final output dimension is 1 for regression 214 if prev_dim != 1: 215 raise ValueError( 216 f"Final layer output dimension {prev_dim} must be 1 for regression" 217 ) 218 219 return True 220 except (AttributeError, IndexError): 221 raise ValueError( 222 "Weights format is invalid. Expected list of (weight, bias) tuples" 223 ) 224 225 def fit(self, X, y): 226 # Standardize the input features 227 X = self.scaler_.fit_transform(X) 228 # Ensure y is 2D for consistency 229 y = y.reshape(-1, 1) 230 self.y_mean_ = jnp.mean(y) 231 y = y - self.y_mean_ 232 # Validate or initialize weights 233 if self.weights is not None: 234 if self._validate_weights(X.shape[1]): 235 self.params = self.weights 236 else: 237 if self.hidden_layer_sizes is None: 238 raise ValueError( 239 "Either weights or hidden_layer_sizes must be provided" 240 ) 241 self.params = initialize_params( 242 X.shape[1], self.hidden_layer_sizes, self.random_state 243 ) 244 loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha) 245 grad_loss = jit(grad(loss_fn)) # compiled gradient evaluation function 246 perex_grads = jit( 247 vmap(grad_loss, in_axes=(None, 0, 0)) 248 ) # fast per-example grads 249 # Training loop 250 for _ in range(self.max_iter): 251 grads = perex_grads(self.params, X, y) 252 # Average gradients across examples 253 grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads) 254 # Update parameters 255 self.params = [ 256 (W - self.learning_rate * dW, b - self.learning_rate * db) 257 for (W, b), (dW, db) in zip(self.params, grads) 258 ] 259 # Store final weights 260 self.weights = self.params 261 return self 262 263 def get_weights(self): 264 """Return the current weights of the model.""" 265 if self.weights is None: 266 raise ValueError( 267 "No weights available. Model has not been fitted yet." 268 ) 269 return self.weights 270 271 def set_weights(self, weights): 272 """Set the weights of the model manually.""" 273 self.weights = weights 274 self.params = weights 275 276 def predict(self, X): 277 X = self.scaler_.transform(X) 278 if self.params is None: 279 raise ValueError("Model has not been fitted yet.") 280 predictions = predict_internal( 281 self.params, 282 X, 283 activation_func=self.activation_name, 284 dropout=self.dropout, 285 seed=self.random_state, 286 ) 287 return predictions.reshape(-1) + self.y_mean_
(Pretrained) Neural Network Regressor.
Parameters:
hidden_layer_sizes : tuple, default=(100,)
The number of neurons in each hidden layer.
max_iter : int, default=100
The maximum number of iterations to train the model.
learning_rate : float, default=0.01
The learning rate for the optimizer.
l1_ratio : float, default=0.5
The ratio of L1 regularization.
alpha : float, default=1e-6
The regularization parameter.
activation_name : str, default="relu"
The activation function to use.
dropout : float, default=0.0
The dropout rate.
random_state : int, default=None
The random state for the random number generator.
weights : list, default=None
The weights to initialize the model with.
Attributes:
weights : list
The weights of the model.
params : list
The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
The scaler used to standardize the input features.
y_mean_ : float
The mean of the target variable.
Methods:
fit(X, y)
Fit the model to the data.
predict(X)
Predict the target variable.
get_weights()
Get the weights of the model.
set_weights(weights)
Set the weights of the model.
225 def fit(self, X, y): 226 # Standardize the input features 227 X = self.scaler_.fit_transform(X) 228 # Ensure y is 2D for consistency 229 y = y.reshape(-1, 1) 230 self.y_mean_ = jnp.mean(y) 231 y = y - self.y_mean_ 232 # Validate or initialize weights 233 if self.weights is not None: 234 if self._validate_weights(X.shape[1]): 235 self.params = self.weights 236 else: 237 if self.hidden_layer_sizes is None: 238 raise ValueError( 239 "Either weights or hidden_layer_sizes must be provided" 240 ) 241 self.params = initialize_params( 242 X.shape[1], self.hidden_layer_sizes, self.random_state 243 ) 244 loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha) 245 grad_loss = jit(grad(loss_fn)) # compiled gradient evaluation function 246 perex_grads = jit( 247 vmap(grad_loss, in_axes=(None, 0, 0)) 248 ) # fast per-example grads 249 # Training loop 250 for _ in range(self.max_iter): 251 grads = perex_grads(self.params, X, y) 252 # Average gradients across examples 253 grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads) 254 # Update parameters 255 self.params = [ 256 (W - self.learning_rate * dW, b - self.learning_rate * db) 257 for (W, b), (dW, db) in zip(self.params, grads) 258 ] 259 # Store final weights 260 self.weights = self.params 261 return self
276 def predict(self, X): 277 X = self.scaler_.transform(X) 278 if self.params is None: 279 raise ValueError("Model has not been fitted yet.") 280 predictions = predict_internal( 281 self.params, 282 X, 283 activation_func=self.activation_name, 284 dropout=self.dropout, 285 seed=self.random_state, 286 ) 287 return predictions.reshape(-1) + self.y_mean_
10class NeuralNetClassifier(BaseEstimator, ClassifierMixin): 11 """ 12 (Pretrained) Neural Network Classifier. 13 14 Parameters: 15 16 hidden_layer_sizes : tuple, default=(100,) 17 The number of neurons in each hidden layer. 18 max_iter : int, default=100 19 The maximum number of iterations to train the model. 20 learning_rate : float, default=0.01 21 The learning rate for the optimizer. 22 l1_ratio : float, default=0.5 23 The ratio of L1 regularization. 24 alpha : float, default=1e-6 25 The regularization parameter. 26 activation_name : str, default="relu" 27 The activation function to use. 28 dropout : float, default=0.0 29 The dropout rate. 30 random_state : int, default=None 31 The random state for the random number generator. 32 weights : list, default=None 33 The weights to initialize the model with. 34 35 Attributes: 36 37 weights : list 38 The weights of the model. 39 params : list 40 The parameters of the model. 41 scaler_ : sklearn.preprocessing.StandardScaler 42 The scaler used to standardize the input features. 43 y_mean_ : float 44 The mean of the target variable. 45 46 Methods: 47 48 fit(X, y) 49 Fit the model to the data. 50 predict(X) 51 Predict the target variable. 52 predict_proba(X) 53 Predict the probability of the target variable. 54 get_weights() 55 Get the weights of the model. 56 set_weights(weights) 57 Set the weights of the model. 58 """ 59 60 _estimator_type = "classifier" 61 62 def __init__( 63 self, 64 hidden_layer_sizes=(100,), 65 max_iter=100, 66 learning_rate=0.01, 67 weights=None, 68 l1_ratio=0.5, 69 alpha=1e-6, 70 activation_name="relu", 71 dropout=0.0, 72 random_state=None, 73 ): 74 self.hidden_layer_sizes = hidden_layer_sizes 75 self.max_iter = max_iter 76 self.learning_rate = learning_rate 77 self.weights = weights 78 self.l1_ratio = l1_ratio 79 self.alpha = alpha 80 self.activation_name = activation_name 81 self.dropout = dropout 82 self.random_state = random_state 83 self.regr = None 84 85 def fit(self, X, y): 86 """Fit the model to the data. 87 88 Parameters: 89 90 X: {array-like}, shape = [n_samples, n_features] 91 Training vectors, where n_samples is the number of samples and 92 n_features is the number of features. 93 y: array-like, shape = [n_samples] 94 Target values. 95 """ 96 regressor = NeuralNetRegressor( 97 hidden_layer_sizes=self.hidden_layer_sizes, 98 max_iter=self.max_iter, 99 learning_rate=self.learning_rate, 100 weights=self.weights, 101 l1_ratio=self.l1_ratio, 102 alpha=self.alpha, 103 activation_name=self.activation_name, 104 dropout=self.dropout, 105 random_state=self.random_state, 106 ) 107 self.regr = SimpleMultitaskClassifier(regressor) 108 self.regr.fit(X, y) 109 self.classes_ = np.unique(y) 110 self.n_classes_ = len(self.classes_) 111 self.n_tasks_ = 1 112 self.n_features_in_ = X.shape[1] 113 self.n_outputs_ = 1 114 self.n_samples_fit_ = X.shape[0] 115 self.n_samples_test_ = X.shape[0] 116 self.n_features_out_ = 1 117 self.n_outputs_ = 1 118 self.n_features_in_ = X.shape[1] 119 self.n_features_out_ = 1 120 self.n_outputs_ = 1 121 return self 122 123 def predict_proba(self, X): 124 """Predict the probability of the target variable. 125 126 Parameters: 127 128 X: {array-like}, shape = [n_samples, n_features] 129 Training vectors, where n_samples is the number of samples and 130 n_features is the number of features. 131 """ 132 return self.regr.predict_proba(X) 133 134 def predict(self, X): 135 """Predict the target variable. 136 137 Parameters: 138 139 X: {array-like}, shape = [n_samples, n_features] 140 Training vectors, where n_samples is the number of samples and 141 n_features is the number of features. 142 """ 143 return self.regr.predict(X) 144 145 @property 146 def _estimator_type(self): 147 return "classifier"
(Pretrained) Neural Network Classifier.
Parameters:
hidden_layer_sizes : tuple, default=(100,)
The number of neurons in each hidden layer.
max_iter : int, default=100
The maximum number of iterations to train the model.
learning_rate : float, default=0.01
The learning rate for the optimizer.
l1_ratio : float, default=0.5
The ratio of L1 regularization.
alpha : float, default=1e-6
The regularization parameter.
activation_name : str, default="relu"
The activation function to use.
dropout : float, default=0.0
The dropout rate.
random_state : int, default=None
The random state for the random number generator.
weights : list, default=None
The weights to initialize the model with.
Attributes:
weights : list
The weights of the model.
params : list
The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
The scaler used to standardize the input features.
y_mean_ : float
The mean of the target variable.
Methods:
fit(X, y)
Fit the model to the data.
predict(X)
Predict the target variable.
predict_proba(X)
Predict the probability of the target variable.
get_weights()
Get the weights of the model.
set_weights(weights)
Set the weights of the model.
85 def fit(self, X, y): 86 """Fit the model to the data. 87 88 Parameters: 89 90 X: {array-like}, shape = [n_samples, n_features] 91 Training vectors, where n_samples is the number of samples and 92 n_features is the number of features. 93 y: array-like, shape = [n_samples] 94 Target values. 95 """ 96 regressor = NeuralNetRegressor( 97 hidden_layer_sizes=self.hidden_layer_sizes, 98 max_iter=self.max_iter, 99 learning_rate=self.learning_rate, 100 weights=self.weights, 101 l1_ratio=self.l1_ratio, 102 alpha=self.alpha, 103 activation_name=self.activation_name, 104 dropout=self.dropout, 105 random_state=self.random_state, 106 ) 107 self.regr = SimpleMultitaskClassifier(regressor) 108 self.regr.fit(X, y) 109 self.classes_ = np.unique(y) 110 self.n_classes_ = len(self.classes_) 111 self.n_tasks_ = 1 112 self.n_features_in_ = X.shape[1] 113 self.n_outputs_ = 1 114 self.n_samples_fit_ = X.shape[0] 115 self.n_samples_test_ = X.shape[0] 116 self.n_features_out_ = 1 117 self.n_outputs_ = 1 118 self.n_features_in_ = X.shape[1] 119 self.n_features_out_ = 1 120 self.n_outputs_ = 1 121 return self
Fit the model to the data.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
123 def predict_proba(self, X): 124 """Predict the probability of the target variable. 125 126 Parameters: 127 128 X: {array-like}, shape = [n_samples, n_features] 129 Training vectors, where n_samples is the number of samples and 130 n_features is the number of features. 131 """ 132 return self.regr.predict_proba(X)
Predict the probability of the target variable.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
134 def predict(self, X): 135 """Predict the target variable. 136 137 Parameters: 138 139 X: {array-like}, shape = [n_samples, n_features] 140 Training vectors, where n_samples is the number of samples and 141 n_features is the number of features. 142 """ 143 return self.regr.predict(X)
Predict the target variable.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
21class PredictionInterval(BaseEstimator, RegressorMixin): 22 """Class PredictionInterval: Obtain prediction intervals. 23 24 Attributes: 25 26 obj: an object; 27 fitted object containing methods `fit` and `predict` 28 29 method: a string; 30 method for constructing the prediction intervals. 31 Currently "splitconformal" (default) and "localconformal" 32 33 level: a float; 34 Confidence level for prediction intervals. Default is 95, 35 equivalent to a miscoverage error of 5 (%) 36 37 replications: an integer; 38 Number of replications for simulated conformal (default is `None`) 39 40 type_pi: a string; 41 type of prediction interval: currently `None` 42 (split conformal without simulation) 43 for type_pi in: 44 - 'bootstrap': Bootstrap resampling. 45 - 'kde': Kernel Density Estimation. 46 47 type_split: a string; 48 "random" (random split of data) or "sequential" (sequential split of data) 49 50 seed: an integer; 51 Reproducibility of fit (there's a random split between fitting and calibration data) 52 """ 53 54 def __init__( 55 self, 56 obj, 57 method="splitconformal", 58 level=95, 59 type_pi=None, 60 type_split="random", 61 replications=None, 62 kernel=None, 63 agg="mean", 64 seed=123, 65 ): 66 self.obj = obj 67 self.method = method 68 self.level = level 69 self.type_pi = type_pi 70 self.type_split = type_split 71 self.replications = replications 72 self.kernel = kernel 73 self.agg = agg 74 self.seed = seed 75 self.alpha_ = 1 - self.level / 100 76 self.quantile_ = None 77 self.icp_ = None 78 self.calibrated_residuals_ = None 79 self.scaled_calibrated_residuals_ = None 80 self.calibrated_residuals_scaler_ = None 81 self.kde_ = None 82 self.aic_ = None 83 self.aicc_ = None 84 self.bic_ = None 85 self.sse_ = None 86 87 def fit(self, X, y, sample_weight=None, **kwargs): 88 """Fit the `method` to training data (X, y). 89 90 Args: 91 92 X: array-like, shape = [n_samples, n_features]; 93 Training set vectors, where n_samples is the number 94 of samples and n_features is the number of features. 95 96 y: array-like, shape = [n_samples, ]; Target values. 97 98 sample_weight: array-like, shape = [n_samples] 99 Sample weights. 100 101 """ 102 103 if self.type_split == "random": 104 X_train, X_calibration, y_train, y_calibration = train_test_split( 105 X, y, test_size=0.5, random_state=self.seed 106 ) 107 108 elif self.type_split == "sequential": 109 n_x = X.shape[0] 110 n_x_half = n_x // 2 111 first_half_idx = range(0, n_x_half) 112 second_half_idx = range(n_x_half, n_x) 113 X_train = X[first_half_idx, :] 114 X_calibration = X[second_half_idx, :] 115 y_train = y[first_half_idx] 116 y_calibration = y[second_half_idx] 117 118 if self.method == "splitconformal": 119 self.obj.fit(X_train, y_train) 120 preds_calibration = self.obj.predict(X_calibration) 121 self.calibrated_residuals_ = y_calibration - preds_calibration 122 absolute_residuals = np.abs(self.calibrated_residuals_) 123 self.calibrated_residuals_scaler_ = StandardScaler( 124 with_mean=True, with_std=True 125 ) 126 self.scaled_calibrated_residuals_ = ( 127 self.calibrated_residuals_scaler_.fit_transform( 128 self.calibrated_residuals_.reshape(-1, 1) 129 ).ravel() 130 ) 131 try: 132 # numpy version >= 1.22 133 self.quantile_ = np.quantile( 134 a=absolute_residuals, q=self.level / 100, method="higher" 135 ) 136 except Exception: 137 # numpy version < 1.22 138 self.quantile_ = np.quantile( 139 a=absolute_residuals, 140 q=self.level / 100, 141 interpolation="higher", 142 ) 143 144 if self.method == "localconformal": 145 mad_estimator = ExtraTreesRegressor() 146 normalizer = RegressorNormalizer( 147 self.obj, mad_estimator, AbsErrorErrFunc() 148 ) 149 nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer) 150 self.icp_ = IcpRegressor(nc) 151 self.icp_.fit(X_train, y_train) 152 self.icp_.calibrate(X_calibration, y_calibration) 153 154 # FIX: Store calibration residuals from the ICP scorer so that 155 # simulation-based prediction intervals are available in predict(). 156 raw_residuals = self.icp_.nc_function.err_func.apply( 157 self.icp_.nc_function.predict(X_calibration), y_calibration 158 ) 159 self.calibrated_residuals_ = raw_residuals 160 self.calibrated_residuals_scaler_ = StandardScaler( 161 with_mean=True, with_std=True 162 ) 163 self.scaled_calibrated_residuals_ = ( 164 self.calibrated_residuals_scaler_.fit_transform( 165 self.calibrated_residuals_.reshape(-1, 1) 166 ).ravel() 167 ) 168 169 # Calculate AIC 170 # Get predictions 171 preds = self.obj.predict(X_calibration) 172 173 # Calculate SSE 174 self.sse_ = np.sum((y_calibration - preds) ** 2) 175 176 # Get number of parameters from the base model 177 n_params = ( 178 getattr(self.obj, "n_hidden_features", 0) + X_calibration.shape[1] 179 ) 180 181 # Calculate AIC 182 n_samples = len(y_calibration) 183 temp = n_samples * np.log(self.sse_ / n_samples) 184 self.aic_ = temp + 2 * n_params 185 self.bic_ = temp + np.log(n_samples) * n_params 186 187 return self 188 189 def _simulate_from_residuals(self, pred, n_obs): 190 """Shared helper: draw `self.replications` simulations from calibrated 191 residuals and return (sims, mean, lower, upper). 192 193 Args: 194 pred: 1-D array of point predictions, shape [n_obs]. 195 n_obs: int, number of test observations. 196 197 Returns: 198 sims_ : 2-D array, shape [n_obs, replications] 199 mean_ : 1-D array, shape [n_obs] 200 lower_ : 1-D array, shape [n_obs] 201 upper_ : 1-D array, shape [n_obs] 202 """ 203 type_pi = self.type_pi if self.type_pi is not None else "kde" 204 replications = ( 205 self.replications if self.replications is not None else 100 206 ) 207 208 assert type_pi in ( 209 "bootstrap", 210 "kde", 211 "normal", 212 "ecdf", 213 "permutation", 214 "smooth-bootstrap", 215 ), ( 216 "`type_pi` must be in ('bootstrap', 'kde', 'normal', 'ecdf', " 217 "'permutation', 'smooth-bootstrap')" 218 ) 219 220 scale = self.calibrated_residuals_scaler_.scale_[0] 221 222 if type_pi == "bootstrap": 223 np.random.seed(self.seed) 224 residuals_sims = np.asarray( 225 [ 226 np.random.choice( 227 a=self.scaled_calibrated_residuals_, 228 size=n_obs, 229 ) 230 for _ in range(replications) 231 ] 232 ).T # shape [n_obs, replications] 233 234 elif type_pi == "kde": 235 kde = gaussian_kde(dataset=self.scaled_calibrated_residuals_) 236 residuals_sims = np.asarray( 237 [ 238 kde.resample(size=n_obs, seed=self.seed + i).ravel() 239 for i in range(replications) 240 ] 241 ).T # shape [n_obs, replications] 242 243 else: # normal / ecdf / permutation / smooth-bootstrap 244 residuals_sims = np.asarray( 245 simulate_replications( 246 data=self.scaled_calibrated_residuals_, 247 method=type_pi, 248 num_replications=replications, 249 n_obs=n_obs, 250 seed=self.seed, 251 ) 252 ).T # shape [n_obs, replications] 253 254 sims = np.asarray( 255 [ 256 pred + scale * residuals_sims[:, i].ravel() 257 for i in range(replications) 258 ] 259 ).T # shape [n_obs, replications] 260 261 mean_ = np.mean(sims, axis=1) 262 lower_ = np.quantile(sims, q=self.alpha_ / 200, axis=1) 263 upper_ = np.quantile(sims, q=1 - self.alpha_ / 200, axis=1) 264 265 return sims, mean_, lower_, upper_ 266 267 def predict(self, X, return_pi=False): 268 """Obtain predictions and prediction intervals 269 270 Args: 271 272 X: array-like, shape = [n_samples, n_features]; 273 Testing set vectors, where n_samples is the number 274 of samples and n_features is the number of features. 275 276 return_pi: boolean 277 Whether the prediction interval is returned or not. 278 Default is False, for compatibility with other _estimators_. 279 If True, a tuple containing the predictions + lower and upper 280 bounds is returned. 281 282 """ 283 284 if self.method == "splitconformal": 285 pred = self.obj.predict(X) 286 287 if self.method == "localconformal": 288 pred = self.icp_.predict(X) 289 290 # ------------------------------------------------------------------ # 291 # splitconformal 292 # ------------------------------------------------------------------ # 293 if self.method == "splitconformal": 294 if self.replications is None and self.type_pi is None: 295 # Plain split-conformal: symmetric quantile band 296 if return_pi: 297 DescribeResult = namedtuple( 298 "DescribeResult", ("mean", "lower", "upper") 299 ) 300 return DescribeResult( 301 pred, pred - self.quantile_, pred + self.quantile_ 302 ) 303 else: 304 return pred 305 306 else: 307 # FIX: simulation-based prediction intervals for splitconformal. 308 # Previously this branch raised NotImplementedError even though 309 # all the necessary logic was present — it was simply unreachable 310 # because the raise fired unconditionally. The code has been 311 # moved into _simulate_from_residuals() and called here. 312 313 if self.type_pi is None: 314 warnings.warn( 315 "type_pi must be set when replications is not None; " 316 "defaulting to 'kde'." 317 ) 318 if self.replications is None: 319 warnings.warn( 320 "replications must be set when type_pi is not None; " 321 "defaulting to 100." 322 ) 323 324 ( 325 self.sims_, 326 self.mean_, 327 self.lower_, 328 self.upper_, 329 ) = self._simulate_from_residuals(pred, X.shape[0]) 330 331 DescribeResult = namedtuple( 332 "DescribeResult", ("mean", "sims", "lower", "upper") 333 ) 334 return DescribeResult( 335 self.mean_, self.sims_, self.lower_, self.upper_ 336 ) 337 338 # ------------------------------------------------------------------ # 339 # localconformal 340 # ------------------------------------------------------------------ # 341 if self.method == "localconformal": 342 if self.replications is None: 343 if return_pi: 344 predictions_bounds = self.icp_.predict( 345 X, significance=1 - self.level 346 ) 347 DescribeResult = namedtuple( 348 "DescribeResult", ("mean", "lower", "upper") 349 ) 350 return DescribeResult( 351 pred, 352 predictions_bounds[:, 0], 353 predictions_bounds[:, 1], 354 ) 355 else: 356 return pred 357 358 else: 359 # FIX: simulation-based prediction intervals for localconformal. 360 # Previously this always raised NotImplementedError. Now we 361 # reuse the calibration residuals stored during fit() and apply 362 # the same simulation logic used by splitconformal via the 363 # shared helper _simulate_from_residuals(). 364 365 if self.type_pi is None: 366 warnings.warn( 367 "type_pi must be set when replications is not None; " 368 "defaulting to 'kde'." 369 ) 370 371 ( 372 self.sims_, 373 self.mean_, 374 self.lower_, 375 self.upper_, 376 ) = self._simulate_from_residuals(pred, X.shape[0]) 377 378 DescribeResult = namedtuple( 379 "DescribeResult", ("mean", "sims", "lower", "upper") 380 ) 381 return DescribeResult( 382 self.mean_, self.sims_, self.lower_, self.upper_ 383 )
Class PredictionInterval: Obtain prediction intervals.
Attributes:
obj: an object;
fitted object containing methods `fit` and `predict`
method: a string;
method for constructing the prediction intervals.
Currently "splitconformal" (default) and "localconformal"
level: a float;
Confidence level for prediction intervals. Default is 95,
equivalent to a miscoverage error of 5 (%)
replications: an integer;
Number of replications for simulated conformal (default is `None`)
type_pi: a string;
type of prediction interval: currently `None`
(split conformal without simulation)
for type_pi in:
- 'bootstrap': Bootstrap resampling.
- 'kde': Kernel Density Estimation.
type_split: a string;
"random" (random split of data) or "sequential" (sequential split of data)
seed: an integer;
Reproducibility of fit (there's a random split between fitting and calibration data)
87 def fit(self, X, y, sample_weight=None, **kwargs): 88 """Fit the `method` to training data (X, y). 89 90 Args: 91 92 X: array-like, shape = [n_samples, n_features]; 93 Training set vectors, where n_samples is the number 94 of samples and n_features is the number of features. 95 96 y: array-like, shape = [n_samples, ]; Target values. 97 98 sample_weight: array-like, shape = [n_samples] 99 Sample weights. 100 101 """ 102 103 if self.type_split == "random": 104 X_train, X_calibration, y_train, y_calibration = train_test_split( 105 X, y, test_size=0.5, random_state=self.seed 106 ) 107 108 elif self.type_split == "sequential": 109 n_x = X.shape[0] 110 n_x_half = n_x // 2 111 first_half_idx = range(0, n_x_half) 112 second_half_idx = range(n_x_half, n_x) 113 X_train = X[first_half_idx, :] 114 X_calibration = X[second_half_idx, :] 115 y_train = y[first_half_idx] 116 y_calibration = y[second_half_idx] 117 118 if self.method == "splitconformal": 119 self.obj.fit(X_train, y_train) 120 preds_calibration = self.obj.predict(X_calibration) 121 self.calibrated_residuals_ = y_calibration - preds_calibration 122 absolute_residuals = np.abs(self.calibrated_residuals_) 123 self.calibrated_residuals_scaler_ = StandardScaler( 124 with_mean=True, with_std=True 125 ) 126 self.scaled_calibrated_residuals_ = ( 127 self.calibrated_residuals_scaler_.fit_transform( 128 self.calibrated_residuals_.reshape(-1, 1) 129 ).ravel() 130 ) 131 try: 132 # numpy version >= 1.22 133 self.quantile_ = np.quantile( 134 a=absolute_residuals, q=self.level / 100, method="higher" 135 ) 136 except Exception: 137 # numpy version < 1.22 138 self.quantile_ = np.quantile( 139 a=absolute_residuals, 140 q=self.level / 100, 141 interpolation="higher", 142 ) 143 144 if self.method == "localconformal": 145 mad_estimator = ExtraTreesRegressor() 146 normalizer = RegressorNormalizer( 147 self.obj, mad_estimator, AbsErrorErrFunc() 148 ) 149 nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer) 150 self.icp_ = IcpRegressor(nc) 151 self.icp_.fit(X_train, y_train) 152 self.icp_.calibrate(X_calibration, y_calibration) 153 154 # FIX: Store calibration residuals from the ICP scorer so that 155 # simulation-based prediction intervals are available in predict(). 156 raw_residuals = self.icp_.nc_function.err_func.apply( 157 self.icp_.nc_function.predict(X_calibration), y_calibration 158 ) 159 self.calibrated_residuals_ = raw_residuals 160 self.calibrated_residuals_scaler_ = StandardScaler( 161 with_mean=True, with_std=True 162 ) 163 self.scaled_calibrated_residuals_ = ( 164 self.calibrated_residuals_scaler_.fit_transform( 165 self.calibrated_residuals_.reshape(-1, 1) 166 ).ravel() 167 ) 168 169 # Calculate AIC 170 # Get predictions 171 preds = self.obj.predict(X_calibration) 172 173 # Calculate SSE 174 self.sse_ = np.sum((y_calibration - preds) ** 2) 175 176 # Get number of parameters from the base model 177 n_params = ( 178 getattr(self.obj, "n_hidden_features", 0) + X_calibration.shape[1] 179 ) 180 181 # Calculate AIC 182 n_samples = len(y_calibration) 183 temp = n_samples * np.log(self.sse_ / n_samples) 184 self.aic_ = temp + 2 * n_params 185 self.bic_ = temp + np.log(n_samples) * n_params 186 187 return self
Fit the method to training data (X, y).
Args:
X: array-like, shape = [n_samples, n_features];
Training set vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples, ]; Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
267 def predict(self, X, return_pi=False): 268 """Obtain predictions and prediction intervals 269 270 Args: 271 272 X: array-like, shape = [n_samples, n_features]; 273 Testing set vectors, where n_samples is the number 274 of samples and n_features is the number of features. 275 276 return_pi: boolean 277 Whether the prediction interval is returned or not. 278 Default is False, for compatibility with other _estimators_. 279 If True, a tuple containing the predictions + lower and upper 280 bounds is returned. 281 282 """ 283 284 if self.method == "splitconformal": 285 pred = self.obj.predict(X) 286 287 if self.method == "localconformal": 288 pred = self.icp_.predict(X) 289 290 # ------------------------------------------------------------------ # 291 # splitconformal 292 # ------------------------------------------------------------------ # 293 if self.method == "splitconformal": 294 if self.replications is None and self.type_pi is None: 295 # Plain split-conformal: symmetric quantile band 296 if return_pi: 297 DescribeResult = namedtuple( 298 "DescribeResult", ("mean", "lower", "upper") 299 ) 300 return DescribeResult( 301 pred, pred - self.quantile_, pred + self.quantile_ 302 ) 303 else: 304 return pred 305 306 else: 307 # FIX: simulation-based prediction intervals for splitconformal. 308 # Previously this branch raised NotImplementedError even though 309 # all the necessary logic was present — it was simply unreachable 310 # because the raise fired unconditionally. The code has been 311 # moved into _simulate_from_residuals() and called here. 312 313 if self.type_pi is None: 314 warnings.warn( 315 "type_pi must be set when replications is not None; " 316 "defaulting to 'kde'." 317 ) 318 if self.replications is None: 319 warnings.warn( 320 "replications must be set when type_pi is not None; " 321 "defaulting to 100." 322 ) 323 324 ( 325 self.sims_, 326 self.mean_, 327 self.lower_, 328 self.upper_, 329 ) = self._simulate_from_residuals(pred, X.shape[0]) 330 331 DescribeResult = namedtuple( 332 "DescribeResult", ("mean", "sims", "lower", "upper") 333 ) 334 return DescribeResult( 335 self.mean_, self.sims_, self.lower_, self.upper_ 336 ) 337 338 # ------------------------------------------------------------------ # 339 # localconformal 340 # ------------------------------------------------------------------ # 341 if self.method == "localconformal": 342 if self.replications is None: 343 if return_pi: 344 predictions_bounds = self.icp_.predict( 345 X, significance=1 - self.level 346 ) 347 DescribeResult = namedtuple( 348 "DescribeResult", ("mean", "lower", "upper") 349 ) 350 return DescribeResult( 351 pred, 352 predictions_bounds[:, 0], 353 predictions_bounds[:, 1], 354 ) 355 else: 356 return pred 357 358 else: 359 # FIX: simulation-based prediction intervals for localconformal. 360 # Previously this always raised NotImplementedError. Now we 361 # reuse the calibration residuals stored during fit() and apply 362 # the same simulation logic used by splitconformal via the 363 # shared helper _simulate_from_residuals(). 364 365 if self.type_pi is None: 366 warnings.warn( 367 "type_pi must be set when replications is not None; " 368 "defaulting to 'kde'." 369 ) 370 371 ( 372 self.sims_, 373 self.mean_, 374 self.lower_, 375 self.upper_, 376 ) = self._simulate_from_residuals(pred, X.shape[0]) 377 378 DescribeResult = namedtuple( 379 "DescribeResult", ("mean", "sims", "lower", "upper") 380 ) 381 return DescribeResult( 382 self.mean_, self.sims_, self.lower_, self.upper_ 383 )
Obtain predictions and prediction intervals
Args:
X: array-like, shape = [n_samples, n_features];
Testing set vectors, where n_samples is the number
of samples and n_features is the number of features.
return_pi: boolean
Whether the prediction interval is returned or not.
Default is False, for compatibility with other _estimators_.
If True, a tuple containing the predictions + lower and upper
bounds is returned.
20class PredictionSet(BaseEstimator, ClassifierMixin): 21 """Class PredictionSet: Obtain prediction sets. 22 23 Attributes: 24 25 obj: an object; 26 fitted object containing methods `fit` and `predict` 27 28 method: a string; 29 method for constructing the prediction sets. 30 Currently "icp" (default, inductive conformal) and "tcp" (transductive conformal) 31 32 level: a float; 33 Confidence level for prediction sets. Default is None, 34 95 is equivalent to a miscoverage error of 5 (%) 35 36 seed: an integer; 37 Reproducibility of fit (there's a random split between fitting and calibration data) 38 """ 39 40 def __init__( 41 self, 42 obj, 43 method="icp", 44 level=None, 45 seed=123, 46 ): 47 self.obj = obj 48 self.method = method 49 self.level = level 50 self.seed = seed 51 if self.level is not None: 52 self.alpha_ = 1 - self.level / 100 53 self.quantile_ = None 54 self.icp_ = None 55 self.tcp_ = None 56 57 if self.method == "icp": 58 self.icp_ = IcpClassifier( 59 ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()), 60 ) 61 elif self.method == "tcp": 62 self.tcp_ = TcpClassifier( 63 ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()), 64 ) 65 else: 66 raise ValueError("`self.method` must be in ('icp', 'tcp')") 67 68 def fit(self, X, y, sample_weight=None, **kwargs): 69 """Fit the `method` to training data (X, y). 70 71 Args: 72 73 X: array-like, shape = [n_samples, n_features]; 74 Training set vectors, where n_samples is the number 75 of samples and n_features is the number of features. 76 77 y: array-like, shape = [n_samples, ]; Target values. 78 79 sample_weight: array-like, shape = [n_samples] 80 Sample weights. 81 82 """ 83 if self.method == "icp": 84 X_train, X_calibration, y_train, y_calibration = train_test_split( 85 X, y, test_size=0.5, random_state=self.seed 86 ) 87 self.icp_.fit(X_train, y_train) 88 self.icp_.calibrate(X_calibration, y_calibration) 89 90 elif self.method == "tcp": 91 self.tcp_.fit(X, y) 92 93 return self 94 95 def predict(self, X, **kwargs): 96 """Obtain predictions and prediction sets 97 98 Args: 99 100 X: array-like, shape = [n_samples, n_features]; 101 Testing set vectors, where n_samples is the number 102 of samples and n_features is the number of features. 103 104 """ 105 106 if self.method == "icp": 107 return self.icp_.predict(X, significance=self.alpha_, **kwargs) 108 109 elif self.method == "tcp": 110 return self.tcp_.predict(X, significance=self.alpha_, **kwargs) 111 112 else: 113 raise ValueError("`self.method` must be in ('icp', 'tcp')") 114 115 def predict_proba(self, X): 116 predictions = self.predict(X) 117 return np.eye(len(np.unique(predictions)))[predictions]
Class PredictionSet: Obtain prediction sets.
Attributes:
obj: an object;
fitted object containing methods `fit` and `predict`
method: a string;
method for constructing the prediction sets.
Currently "icp" (default, inductive conformal) and "tcp" (transductive conformal)
level: a float;
Confidence level for prediction sets. Default is None,
95 is equivalent to a miscoverage error of 5 (%)
seed: an integer;
Reproducibility of fit (there's a random split between fitting and calibration data)
68 def fit(self, X, y, sample_weight=None, **kwargs): 69 """Fit the `method` to training data (X, y). 70 71 Args: 72 73 X: array-like, shape = [n_samples, n_features]; 74 Training set vectors, where n_samples is the number 75 of samples and n_features is the number of features. 76 77 y: array-like, shape = [n_samples, ]; Target values. 78 79 sample_weight: array-like, shape = [n_samples] 80 Sample weights. 81 82 """ 83 if self.method == "icp": 84 X_train, X_calibration, y_train, y_calibration = train_test_split( 85 X, y, test_size=0.5, random_state=self.seed 86 ) 87 self.icp_.fit(X_train, y_train) 88 self.icp_.calibrate(X_calibration, y_calibration) 89 90 elif self.method == "tcp": 91 self.tcp_.fit(X, y) 92 93 return self
Fit the method to training data (X, y).
Args:
X: array-like, shape = [n_samples, n_features];
Training set vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples, ]; Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
95 def predict(self, X, **kwargs): 96 """Obtain predictions and prediction sets 97 98 Args: 99 100 X: array-like, shape = [n_samples, n_features]; 101 Testing set vectors, where n_samples is the number 102 of samples and n_features is the number of features. 103 104 """ 105 106 if self.method == "icp": 107 return self.icp_.predict(X, significance=self.alpha_, **kwargs) 108 109 elif self.method == "tcp": 110 return self.tcp_.predict(X, significance=self.alpha_, **kwargs) 111 112 else: 113 raise ValueError("`self.method` must be in ('icp', 'tcp')")
Obtain predictions and prediction sets
Args:
X: array-like, shape = [n_samples, n_features];
Testing set vectors, where n_samples is the number
of samples and n_features is the number of features.
19class SimpleMultitaskClassifier(Base, ClassifierMixin): 20 """Multitask Classification model based on regression models, with shared covariates 21 22 Parameters: 23 24 obj: object 25 any object (must be a regression model) containing a method fit (obj.fit()) 26 and a method predict (obj.predict()) 27 28 seed: int 29 reproducibility seed 30 31 Attributes: 32 33 fit_objs_: dict 34 objects adjusted to each individual time series 35 36 n_classes_: int 37 number of classes for the classifier 38 39 Examples: 40 41 ```python 42 import nnetsauce as ns 43 import numpy as np 44 from sklearn.datasets import load_breast_cancer 45 from sklearn.linear_model import LinearRegression 46 from sklearn.model_selection import train_test_split 47 from sklearn import metrics 48 from time import time 49 50 breast_cancer = load_breast_cancer() 51 Z = breast_cancer.data 52 t = breast_cancer.target 53 54 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2, 55 random_state=123+2*10) 56 57 # Linear Regression is used 58 regr = LinearRegression() 59 fit_obj = ns.SimpleMultitaskClassifier(regr) 60 61 start = time() 62 fit_obj.fit(X_train, y_train) 63 print(f"Elapsed {time() - start}") 64 65 print(fit_obj.score(X_test, y_test)) 66 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 67 68 start = time() 69 preds = fit_obj.predict(X_test) 70 print(f"Elapsed {time() - start}") 71 print(metrics.classification_report(preds, y_test)) 72 ``` 73 74 """ 75 76 # construct the object ----- 77 _estimator_type = "classifier" 78 79 def __init__( 80 self, 81 obj, 82 ): 83 self.type_fit = "classification" 84 self.obj = obj 85 self.fit_objs_ = {} 86 self.multioutput_model_ = None 87 self.X_scaler_ = StandardScaler() 88 self.scaled_X_ = None 89 90 def fit(self, X, y, sample_weight=None, **kwargs): 91 """Fit SimpleMultitaskClassifier to training data (X, y). 92 93 Args: 94 95 X: {array-like}, shape = [n_samples, n_features] 96 Training vectors, where n_samples is the number 97 of samples and n_features is the number of features. 98 99 y: array-like, shape = [n_samples] 100 Target values. 101 102 **kwargs: additional parameters to be passed to 103 self.cook_training_set or self.obj.fit 104 105 Returns: 106 107 self: object 108 109 """ 110 111 assert mx.is_factor(y), "y must contain only integers" 112 113 self.classes_ = np.unique(y) # for compatibility with sklearn 114 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 115 116 self.scaled_X_ = self.X_scaler_.fit_transform(X) 117 118 # multitask response 119 Y = mo.one_hot_encode2(y, self.n_classes_) 120 121 # Try MultiOutputRegressor first (more efficient) 122 try: 123 self.multioutput_model_ = MultiOutputRegressor(deepcopy(self.obj)) 124 try: 125 self.multioutput_model_.fit( 126 self.scaled_X_, Y, sample_weight=sample_weight, **kwargs 127 ) 128 except TypeError: 129 # If sample_weight not supported, try without it 130 self.multioutput_model_.fit(self.scaled_X_, Y, **kwargs) 131 except Exception: 132 # Fallback: fit separate models for each class 133 self.multioutput_model_ = None 134 try: 135 for i in range(self.n_classes_): 136 self.fit_objs_[i] = deepcopy( 137 self.obj.fit( 138 self.scaled_X_, 139 Y[:, i], 140 sample_weight=sample_weight, 141 **kwargs 142 ) 143 ) 144 except TypeError: 145 for i in range(self.n_classes_): 146 self.fit_objs_[i] = deepcopy( 147 self.obj.fit(self.scaled_X_, Y[:, i], **kwargs) 148 ) 149 return self 150 151 def predict(self, X, **kwargs): 152 """Predict test data X. 153 154 Args: 155 156 X: {array-like}, shape = [n_samples, n_features] 157 Training vectors, where n_samples is the number 158 of samples and n_features is the number of features. 159 160 **kwargs: additional parameters 161 162 Returns: 163 164 model predictions: {array-like} 165 166 """ 167 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 168 169 def predict_proba(self, X, **kwargs): 170 """Predict probabilities for test data X. 171 172 Args: 173 174 X: {array-like}, shape = [n_samples, n_features] 175 Training vectors, where n_samples is the number 176 of samples and n_features is the number of features. 177 178 **kwargs: additional parameters 179 180 Returns: 181 182 probability estimates for test data: {array-like} 183 184 """ 185 186 shape_X = X.shape 187 188 if self.multioutput_model_ is not None: 189 # Use MultiOutputRegressor for prediction 190 if len(shape_X) == 1: # one example 191 n_features = shape_X[0] 192 new_X = mo.rbind( 193 X.reshape(1, n_features), 194 np.ones(n_features).reshape(1, n_features), 195 ) 196 Z = self.X_scaler_.transform(new_X, **kwargs) 197 probs = self.multioutput_model_.predict(Z, **kwargs)[:1, :] 198 else: # multiple rows 199 Z = self.X_scaler_.transform(X, **kwargs) 200 probs = self.multioutput_model_.predict(Z, **kwargs) 201 else: 202 # Use separate models for each class 203 probs = np.zeros((shape_X[0], self.n_classes_)) 204 205 if len(shape_X) == 1: # one example 206 n_features = shape_X[0] 207 208 new_X = mo.rbind( 209 X.reshape(1, n_features), 210 np.ones(n_features).reshape(1, n_features), 211 ) 212 213 Z = self.X_scaler_.transform(new_X, **kwargs) 214 215 # Fallback to standard model 216 for i in range(self.n_classes_): 217 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 218 219 else: # multiple rows 220 Z = self.X_scaler_.transform(X, **kwargs) 221 222 # Fallback to standard model 223 for i in range(self.n_classes_): 224 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 225 226 expit_raw_probs = expit(probs) 227 228 # Add small epsilon to avoid division by zero 229 row_sums = expit_raw_probs.sum(axis=1)[:, None] 230 row_sums[row_sums < 1e-10] = 1e-10 231 232 return expit_raw_probs / row_sums 233 234 def decision_function(self, X, **kwargs): 235 """Compute the decision function of X. 236 237 Parameters: 238 X: {array-like}, shape = [n_samples, n_features] 239 Samples to compute decision function for. 240 241 **kwargs: additional parameters to be passed to 242 self.cook_test_set 243 244 Returns: 245 array-like of shape (n_samples,) or (n_samples, n_classes) 246 Decision function of the input samples. The order of outputs is the same 247 as that of the classes passed to fit. 248 """ 249 if not hasattr(self.obj, "decision_function"): 250 # If base classifier doesn't have decision_function, use predict_proba 251 proba = self.predict_proba(X, **kwargs) 252 if proba.shape[1] == 2: 253 return proba[:, 1] # For binary classification 254 return proba # For multiclass 255 256 if len(X.shape) == 1: 257 n_features = X.shape[0] 258 new_X = mo.rbind( 259 X.reshape(1, n_features), 260 np.ones(n_features).reshape(1, n_features), 261 ) 262 263 return ( 264 self.obj.decision_function( 265 self.cook_test_set(new_X, **kwargs), **kwargs 266 ) 267 )[0] 268 269 return self.obj.decision_function( 270 self.cook_test_set(X, **kwargs), **kwargs 271 ) 272 273 @property 274 def _estimator_type(self): 275 return "classifier"
Multitask Classification model based on regression models, with shared covariates
Parameters:
obj: object
any object (must be a regression model) containing a method fit (obj.fit())
and a method predict (obj.predict())
seed: int
reproducibility seed
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
n_classes_: int
number of classes for the classifier
Examples:
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
random_state=123+2*10)
# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.SimpleMultitaskClassifier(regr)
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
90 def fit(self, X, y, sample_weight=None, **kwargs): 91 """Fit SimpleMultitaskClassifier to training data (X, y). 92 93 Args: 94 95 X: {array-like}, shape = [n_samples, n_features] 96 Training vectors, where n_samples is the number 97 of samples and n_features is the number of features. 98 99 y: array-like, shape = [n_samples] 100 Target values. 101 102 **kwargs: additional parameters to be passed to 103 self.cook_training_set or self.obj.fit 104 105 Returns: 106 107 self: object 108 109 """ 110 111 assert mx.is_factor(y), "y must contain only integers" 112 113 self.classes_ = np.unique(y) # for compatibility with sklearn 114 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 115 116 self.scaled_X_ = self.X_scaler_.fit_transform(X) 117 118 # multitask response 119 Y = mo.one_hot_encode2(y, self.n_classes_) 120 121 # Try MultiOutputRegressor first (more efficient) 122 try: 123 self.multioutput_model_ = MultiOutputRegressor(deepcopy(self.obj)) 124 try: 125 self.multioutput_model_.fit( 126 self.scaled_X_, Y, sample_weight=sample_weight, **kwargs 127 ) 128 except TypeError: 129 # If sample_weight not supported, try without it 130 self.multioutput_model_.fit(self.scaled_X_, Y, **kwargs) 131 except Exception: 132 # Fallback: fit separate models for each class 133 self.multioutput_model_ = None 134 try: 135 for i in range(self.n_classes_): 136 self.fit_objs_[i] = deepcopy( 137 self.obj.fit( 138 self.scaled_X_, 139 Y[:, i], 140 sample_weight=sample_weight, 141 **kwargs 142 ) 143 ) 144 except TypeError: 145 for i in range(self.n_classes_): 146 self.fit_objs_[i] = deepcopy( 147 self.obj.fit(self.scaled_X_, Y[:, i], **kwargs) 148 ) 149 return self
Fit SimpleMultitaskClassifier to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
151 def predict(self, X, **kwargs): 152 """Predict test data X. 153 154 Args: 155 156 X: {array-like}, shape = [n_samples, n_features] 157 Training vectors, where n_samples is the number 158 of samples and n_features is the number of features. 159 160 **kwargs: additional parameters 161 162 Returns: 163 164 model predictions: {array-like} 165 166 """ 167 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters
Returns:
model predictions: {array-like}
169 def predict_proba(self, X, **kwargs): 170 """Predict probabilities for test data X. 171 172 Args: 173 174 X: {array-like}, shape = [n_samples, n_features] 175 Training vectors, where n_samples is the number 176 of samples and n_features is the number of features. 177 178 **kwargs: additional parameters 179 180 Returns: 181 182 probability estimates for test data: {array-like} 183 184 """ 185 186 shape_X = X.shape 187 188 if self.multioutput_model_ is not None: 189 # Use MultiOutputRegressor for prediction 190 if len(shape_X) == 1: # one example 191 n_features = shape_X[0] 192 new_X = mo.rbind( 193 X.reshape(1, n_features), 194 np.ones(n_features).reshape(1, n_features), 195 ) 196 Z = self.X_scaler_.transform(new_X, **kwargs) 197 probs = self.multioutput_model_.predict(Z, **kwargs)[:1, :] 198 else: # multiple rows 199 Z = self.X_scaler_.transform(X, **kwargs) 200 probs = self.multioutput_model_.predict(Z, **kwargs) 201 else: 202 # Use separate models for each class 203 probs = np.zeros((shape_X[0], self.n_classes_)) 204 205 if len(shape_X) == 1: # one example 206 n_features = shape_X[0] 207 208 new_X = mo.rbind( 209 X.reshape(1, n_features), 210 np.ones(n_features).reshape(1, n_features), 211 ) 212 213 Z = self.X_scaler_.transform(new_X, **kwargs) 214 215 # Fallback to standard model 216 for i in range(self.n_classes_): 217 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 218 219 else: # multiple rows 220 Z = self.X_scaler_.transform(X, **kwargs) 221 222 # Fallback to standard model 223 for i in range(self.n_classes_): 224 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 225 226 expit_raw_probs = expit(probs) 227 228 # Add small epsilon to avoid division by zero 229 row_sums = expit_raw_probs.sum(axis=1)[:, None] 230 row_sums[row_sums < 1e-10] = 1e-10 231 232 return expit_raw_probs / row_sums
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters
Returns:
probability estimates for test data: {array-like}
9class Optimizer: 10 """Optimizer class 11 12 Attributes: 13 14 type_optim: str 15 type of optimizer, (currently) either 'sgd' (stochastic minibatch gradient descent) 16 or 'scd' (stochastic minibatch coordinate descent) 17 18 num_iters: int 19 number of iterations of the optimizer 20 21 learning_rate: float 22 step size 23 24 batch_prop: float 25 proportion of the initial data used at each optimization step 26 27 learning_method: str 28 "poly" - learning rate decreasing as a polynomial function 29 of # of iterations (default) 30 "exp" - learning rate decreasing as an exponential function 31 of # of iterations 32 "momentum" - gradient descent using momentum 33 34 randomization: str 35 type of randomization applied at each step 36 "strat" - stratified subsampling (default) 37 "shuffle" - random subsampling 38 39 mass: float 40 mass on velocity, for `method` == "momentum" 41 42 decay: float 43 coefficient of decrease of the learning rate for 44 `method` == "poly" and `method` == "exp" 45 46 tolerance: float 47 early stopping parameter (convergence of loss function) 48 49 verbose: int 50 controls verbosity of gradient descent 51 0 - nothing is printed 52 1 - a progress bar is printed 53 2 - successive loss function values are printed 54 55 """ 56 57 # construct the object ----- 58 59 def __init__( 60 self, 61 type_optim="sgd", 62 num_iters=100, 63 learning_rate=0.01, 64 batch_prop=1.0, 65 learning_method="momentum", 66 randomization="strat", 67 mass=0.9, 68 decay=0.1, 69 tolerance=1e-3, 70 verbose=1, 71 ): 72 self.type_optim = type_optim 73 self.num_iters = num_iters 74 self.learning_rate = learning_rate 75 self.batch_prop = batch_prop 76 self.learning_method = learning_method 77 self.randomization = randomization 78 self.mass = mass 79 self.decay = decay 80 self.tolerance = tolerance 81 self.verbose = verbose 82 self.opt = None 83 84 def fit(self, loss_func, response, x0, q=None, **kwargs): 85 """Fit GLM model to training data (X, y). 86 87 Args: 88 89 loss_func: loss function 90 91 response: array-like, shape = [n_samples] 92 target variable (used for subsampling) 93 94 x0: array-like, shape = [n_features] 95 initial value provided to the optimizer 96 97 **kwargs: additional parameters to be passed to 98 loss function 99 100 Returns: 101 102 self: object 103 104 """ 105 106 if self.type_optim == "scd": 107 self.results = scd( 108 loss_func, 109 response=response, 110 x=x0, 111 num_iters=self.num_iters, 112 batch_prop=self.batch_prop, 113 learning_rate=self.learning_rate, 114 learning_method=self.learning_method, 115 mass=self.mass, 116 decay=self.decay, 117 randomization=self.randomization, 118 tolerance=self.tolerance, 119 verbose=self.verbose, 120 **kwargs 121 ) 122 123 if self.type_optim == "sgd": 124 self.results = sgd( 125 loss_func, 126 response=response, 127 x=x0, 128 num_iters=self.num_iters, 129 batch_prop=self.batch_prop, 130 learning_rate=self.learning_rate, 131 learning_method=self.learning_method, 132 mass=self.mass, 133 decay=self.decay, 134 randomization=self.randomization, 135 tolerance=self.tolerance, 136 verbose=self.verbose, 137 **kwargs 138 ) 139 140 return self 141 142 def one_hot_encode(self, y, n_classes): 143 return one_hot_encode(y, n_classes)
Optimizer class
Attributes:
type_optim: str
type of optimizer, (currently) either 'sgd' (stochastic minibatch gradient descent)
or 'scd' (stochastic minibatch coordinate descent)
num_iters: int
number of iterations of the optimizer
learning_rate: float
step size
batch_prop: float
proportion of the initial data used at each optimization step
learning_method: str
"poly" - learning rate decreasing as a polynomial function
of # of iterations (default)
"exp" - learning rate decreasing as an exponential function
of # of iterations
"momentum" - gradient descent using momentum
randomization: str
type of randomization applied at each step
"strat" - stratified subsampling (default)
"shuffle" - random subsampling
mass: float
mass on velocity, for `method` == "momentum"
decay: float
coefficient of decrease of the learning rate for
`method` == "poly" and `method` == "exp"
tolerance: float
early stopping parameter (convergence of loss function)
verbose: int
controls verbosity of gradient descent
0 - nothing is printed
1 - a progress bar is printed
2 - successive loss function values are printed
84 def fit(self, loss_func, response, x0, q=None, **kwargs): 85 """Fit GLM model to training data (X, y). 86 87 Args: 88 89 loss_func: loss function 90 91 response: array-like, shape = [n_samples] 92 target variable (used for subsampling) 93 94 x0: array-like, shape = [n_features] 95 initial value provided to the optimizer 96 97 **kwargs: additional parameters to be passed to 98 loss function 99 100 Returns: 101 102 self: object 103 104 """ 105 106 if self.type_optim == "scd": 107 self.results = scd( 108 loss_func, 109 response=response, 110 x=x0, 111 num_iters=self.num_iters, 112 batch_prop=self.batch_prop, 113 learning_rate=self.learning_rate, 114 learning_method=self.learning_method, 115 mass=self.mass, 116 decay=self.decay, 117 randomization=self.randomization, 118 tolerance=self.tolerance, 119 verbose=self.verbose, 120 **kwargs 121 ) 122 123 if self.type_optim == "sgd": 124 self.results = sgd( 125 loss_func, 126 response=response, 127 x=x0, 128 num_iters=self.num_iters, 129 batch_prop=self.batch_prop, 130 learning_rate=self.learning_rate, 131 learning_method=self.learning_method, 132 mass=self.mass, 133 decay=self.decay, 134 randomization=self.randomization, 135 tolerance=self.tolerance, 136 verbose=self.verbose, 137 **kwargs 138 ) 139 140 return self
Fit GLM model to training data (X, y).
Args:
loss_func: loss function
response: array-like, shape = [n_samples]
target variable (used for subsampling)
x0: array-like, shape = [n_features]
initial value provided to the optimizer
**kwargs: additional parameters to be passed to
loss function
Returns:
self: object
37class QuantileRegressor(BaseEstimator, RegressorMixin): 38 """ 39 Quantile Regressor. 40 41 Parameters: 42 43 obj: base model (regression model) 44 The base regressor from which to build a 45 quantile regressor. 46 47 level: int, default=95 48 The level of the quantiles to compute. 49 50 scoring: str, default="predictions" 51 The scoring to use for the optimization and constructing 52 prediction intervals (predictions, residuals, conformal, 53 studentized, conformal-studentized). 54 55 Attributes: 56 57 obj_ : base model (regression model) 58 The base regressor from which to build a 59 quantile regressor. 60 61 offset_multipliers_ : list 62 The multipliers for the offset. 63 64 scoring_residuals_ : list 65 The residuals for the scoring. 66 67 student_multiplier_ : float 68 The multiplier for the student. 69 70 """ 71 72 def __init__(self, obj, level=95, scoring="predictions"): 73 assert scoring in ( 74 "predictions", 75 "residuals", 76 "conformal", 77 "studentized", 78 "conformal-studentized", 79 ), "scoring must be 'predictions' or 'residuals' or 'conformal' or 'studentized' or 'conformal-studentized'" 80 self.obj = obj 81 self.level = level 82 low_risk_level = (1 - level / 100) / 2 83 self.quantiles = [low_risk_level, 0.5, 1 - low_risk_level] 84 self.scoring = scoring 85 self.offset_multipliers_ = None 86 self.obj_ = None 87 self.scoring_residuals_ = None 88 self.student_multiplier_ = None 89 90 def _compute_quantile_loss(self, residuals, quantile): 91 """ 92 Compute the quantile loss for a given set of residuals and quantile. 93 """ 94 return np.mean( 95 residuals 96 * (quantile * (residuals >= 0) + (quantile - 1) * (residuals < 0)) 97 ) 98 99 def _optimize_multiplier( 100 self, 101 y, 102 base_predictions, 103 prev_predictions, 104 scoring_residuals=None, 105 quantile=0.5, 106 ): 107 """ 108 Optimize the multiplier for a given quantile. 109 """ 110 if not 0 < quantile < 1: 111 raise ValueError("Quantile should be between 0 and 1.") 112 113 n = len(y) 114 115 def objective(log_multiplier): 116 """ 117 Objective function for optimization. 118 """ 119 # Convert to positive multiplier using exp 120 multiplier = np.exp(log_multiplier[0]) 121 if self.scoring == "predictions": 122 assert ( 123 base_predictions is not None 124 ), "base_predictions must be not None" 125 # Calculate predictions 126 if prev_predictions is None: 127 # For first quantile, subtract from conditional expectation 128 predictions = base_predictions - multiplier * np.abs( 129 base_predictions 130 ) 131 else: 132 # For other quantiles, add to previous quantile 133 offset = multiplier * np.abs(base_predictions) 134 predictions = prev_predictions + offset 135 elif self.scoring in ("residuals", "conformal"): 136 assert ( 137 scoring_residuals is not None 138 ), "scoring_residuals must be not None" 139 # print("scoring_residuals", scoring_residuals) 140 # Calculate predictions 141 if prev_predictions is None: 142 # For first quantile, subtract from conditional expectation 143 predictions = base_predictions - multiplier * np.std( 144 scoring_residuals 145 ) / np.sqrt(len(scoring_residuals)) 146 # print("predictions", predictions) 147 else: 148 # For other quantiles, add to previous quantile 149 offset = ( 150 multiplier 151 * np.std(scoring_residuals) 152 / np.sqrt(len(scoring_residuals)) 153 ) 154 predictions = prev_predictions + offset 155 elif self.scoring in ("studentized", "conformal-studentized"): 156 assert ( 157 scoring_residuals is not None 158 ), "scoring_residuals must be not None" 159 # Calculate predictions 160 if prev_predictions is None: 161 # For first quantile, subtract from conditional expectation 162 predictions = ( 163 base_predictions - multiplier * self.student_multiplier_ 164 ) 165 # print("predictions", predictions) 166 else: 167 # For other quantiles, add to previous quantile 168 offset = multiplier * self.student_multiplier_ 169 predictions = prev_predictions + offset 170 else: 171 raise ValueError("Invalid argument 'scoring'") 172 173 return self._compute_quantile_loss(y - predictions, quantile) 174 175 # Optimize in log space for numerical stability 176 # bounds = [(-10, 10)] # log space bounds 177 bounds = [(-100, 100)] # log space bounds 178 result = differential_evolution( 179 objective, 180 bounds, 181 # popsize=15, 182 # maxiter=100, 183 # tol=1e-4, 184 popsize=25, 185 maxiter=200, 186 tol=1e-6, 187 disp=False, 188 ) 189 190 return np.exp(result.x[0]) 191 192 def fit(self, X, y): 193 """Fit the model to the data. 194 195 Parameters: 196 197 X: {array-like}, shape = [n_samples, n_features] 198 Training vectors, where n_samples is the number of samples and 199 n_features is the number of features. 200 y: array-like, shape = [n_samples] 201 Target values. 202 """ 203 self.obj_ = clone(self.obj) 204 205 if self.scoring in ("predictions", "residuals"): 206 self.obj_.fit(X, y) 207 base_predictions = self.obj_.predict(X) 208 scoring_residuals = y - base_predictions 209 self.scoring_residuals_ = scoring_residuals 210 211 elif self.scoring == "conformal": 212 X_train, X_calib, y_train, y_calib = train_test_split( 213 X, y, test_size=0.5, random_state=42 214 ) 215 self.obj_.fit(X_train, y_train) 216 scoring_residuals = y_calib - self.obj_.predict( 217 X_calib 218 ) # These are calibration predictions 219 self.scoring_residuals_ = scoring_residuals 220 # Update base_predictions to use training predictions for optimization 221 self.obj_.fit(X_calib, y_calib) 222 base_predictions = self.obj_.predict(X_calib) 223 224 elif self.scoring in ("studentized", "conformal-studentized"): 225 # Calculate student multiplier 226 if self.scoring == "conformal-studentized": 227 X_train, X_calib, y_train, y_calib = train_test_split( 228 X, y, test_size=0.5, random_state=42 229 ) 230 self.obj_.fit(X_train, y_train) 231 scoring_residuals = y_calib - self.obj_.predict(X_calib) 232 # Calculate studentized multiplier using calibration data 233 self.student_multiplier_ = np.std(y_calib, ddof=1) / np.sqrt( 234 len(y_calib) - 1 235 ) 236 self.obj_.fit(X_calib, y_calib) 237 base_predictions = self.obj_.predict(X_calib) 238 else: # regular studentized 239 self.obj_.fit(X, y) 240 base_predictions = self.obj_.predict(X) 241 scoring_residuals = y - base_predictions 242 self.student_multiplier_ = np.std(y, ddof=1) / np.sqrt( 243 len(y) - 1 244 ) 245 246 # Initialize storage for multipliers 247 self.offset_multipliers_ = [] 248 # Keep track of current predictions for each quantile 249 current_predictions = None 250 251 # Fit each quantile sequentially 252 for i, quantile in enumerate(self.quantiles): 253 if self.scoring == "predictions": 254 multiplier = self._optimize_multiplier( 255 y=y, 256 base_predictions=base_predictions, 257 prev_predictions=current_predictions, 258 quantile=quantile, 259 ) 260 261 self.offset_multipliers_.append(multiplier) 262 263 # Update current predictions 264 if current_predictions is None: 265 # First quantile (lowest) 266 current_predictions = ( 267 base_predictions - multiplier * np.abs(base_predictions) 268 ) 269 else: 270 # Subsequent quantiles 271 offset = multiplier * np.abs(base_predictions) 272 current_predictions = current_predictions + offset 273 274 elif self.scoring == "residuals": 275 multiplier = self._optimize_multiplier( 276 y=y, 277 base_predictions=base_predictions, 278 scoring_residuals=scoring_residuals, 279 prev_predictions=current_predictions, 280 quantile=quantile, 281 ) 282 283 self.offset_multipliers_.append(multiplier) 284 285 # Update current predictions 286 if current_predictions is None: 287 # First quantile (lowest) 288 current_predictions = ( 289 base_predictions 290 - multiplier 291 * np.std(scoring_residuals) 292 / np.sqrt(len(scoring_residuals)) 293 ) 294 else: 295 # Subsequent quantiles 296 offset = ( 297 multiplier 298 * np.std(scoring_residuals) 299 / np.sqrt(len(scoring_residuals)) 300 ) 301 current_predictions = current_predictions + offset 302 303 elif self.scoring == "conformal": 304 multiplier = self._optimize_multiplier( 305 y=y_calib, 306 base_predictions=base_predictions, 307 scoring_residuals=scoring_residuals, 308 prev_predictions=current_predictions, 309 quantile=quantile, 310 ) 311 312 self.offset_multipliers_.append(multiplier) 313 314 # Update current predictions 315 if current_predictions is None: 316 # First quantile (lowest) 317 current_predictions = ( 318 base_predictions 319 - multiplier 320 * np.std(scoring_residuals) 321 / np.sqrt(len(scoring_residuals)) 322 ) 323 else: 324 # Subsequent quantiles 325 offset = ( 326 multiplier 327 * np.std(scoring_residuals) 328 / np.sqrt(len(scoring_residuals)) 329 ) 330 current_predictions = current_predictions + offset 331 332 elif self.scoring in ("studentized", "conformal-studentized"): 333 multiplier = self._optimize_multiplier( 334 y=y_calib if self.scoring == "conformal-studentized" else y, 335 base_predictions=base_predictions, 336 scoring_residuals=scoring_residuals, 337 prev_predictions=current_predictions, 338 quantile=quantile, 339 ) 340 341 self.offset_multipliers_.append(multiplier) 342 343 # Update current predictions 344 if current_predictions is None: 345 current_predictions = ( 346 base_predictions - multiplier * self.student_multiplier_ 347 ) 348 else: 349 offset = multiplier * self.student_multiplier_ 350 current_predictions = current_predictions + offset 351 352 return self 353 354 def predict(self, X, return_pi=False): 355 """Predict the target variable. 356 357 Parameters: 358 359 X: {array-like}, shape = [n_samples, n_features] 360 Training vectors, where n_samples is the number of samples and 361 n_features is the number of features. 362 363 return_pi: bool, default=True 364 Whether to return the prediction intervals. 365 """ 366 if self.obj_ is None or self.offset_multipliers_ is None: 367 raise ValueError("Model not fitted yet.") 368 369 base_predictions = self.obj_.predict(X) 370 all_predictions = [] 371 372 if self.scoring == "predictions": 373 # Generate first quantile 374 current_predictions = base_predictions - self.offset_multipliers_[ 375 0 376 ] * np.abs(base_predictions) 377 all_predictions.append(current_predictions) 378 379 # Generate remaining quantiles 380 for multiplier in self.offset_multipliers_[1:]: 381 offset = multiplier * np.abs(base_predictions) 382 current_predictions = current_predictions + offset 383 all_predictions.append(current_predictions) 384 385 elif self.scoring in ("residuals", "conformal"): 386 # Generate first quantile 387 current_predictions = base_predictions - self.offset_multipliers_[ 388 0 389 ] * np.std(self.scoring_residuals_) / np.sqrt( 390 len(self.scoring_residuals_) 391 ) 392 all_predictions.append(current_predictions) 393 394 # Generate remaining quantiles 395 for multiplier in self.offset_multipliers_[1:]: 396 offset = ( 397 multiplier 398 * np.std(self.scoring_residuals_) 399 / np.sqrt(len(self.scoring_residuals_)) 400 ) 401 current_predictions = current_predictions + offset 402 all_predictions.append(current_predictions) 403 404 elif self.scoring in ("studentized", "conformal-studentized"): 405 # Generate first quantile 406 current_predictions = ( 407 base_predictions 408 - self.offset_multipliers_[0] * self.student_multiplier_ 409 ) 410 all_predictions.append(current_predictions) 411 412 # Generate remaining quantiles 413 for multiplier in self.offset_multipliers_[1:]: 414 offset = multiplier * self.student_multiplier_ 415 current_predictions = current_predictions + offset 416 all_predictions.append(current_predictions) 417 418 if return_pi == False: 419 return np.asarray(all_predictions[1]) 420 421 DescribeResult = namedtuple( 422 "DecribeResult", ["mean", "lower", "upper", "median"] 423 ) 424 DescribeResult.mean = base_predictions 425 DescribeResult.lower = np.asarray(all_predictions[0]) 426 DescribeResult.median = np.asarray(all_predictions[1]) 427 DescribeResult.upper = np.asarray(all_predictions[2]) 428 429 return DescribeResult
Quantile Regressor.
Parameters:
obj: base model (regression model)
The base regressor from which to build a
quantile regressor.
level: int, default=95
The level of the quantiles to compute.
scoring: str, default="predictions"
The scoring to use for the optimization and constructing
prediction intervals (predictions, residuals, conformal,
studentized, conformal-studentized).
Attributes:
obj_ : base model (regression model)
The base regressor from which to build a
quantile regressor.
offset_multipliers_ : list
The multipliers for the offset.
scoring_residuals_ : list
The residuals for the scoring.
student_multiplier_ : float
The multiplier for the student.
192 def fit(self, X, y): 193 """Fit the model to the data. 194 195 Parameters: 196 197 X: {array-like}, shape = [n_samples, n_features] 198 Training vectors, where n_samples is the number of samples and 199 n_features is the number of features. 200 y: array-like, shape = [n_samples] 201 Target values. 202 """ 203 self.obj_ = clone(self.obj) 204 205 if self.scoring in ("predictions", "residuals"): 206 self.obj_.fit(X, y) 207 base_predictions = self.obj_.predict(X) 208 scoring_residuals = y - base_predictions 209 self.scoring_residuals_ = scoring_residuals 210 211 elif self.scoring == "conformal": 212 X_train, X_calib, y_train, y_calib = train_test_split( 213 X, y, test_size=0.5, random_state=42 214 ) 215 self.obj_.fit(X_train, y_train) 216 scoring_residuals = y_calib - self.obj_.predict( 217 X_calib 218 ) # These are calibration predictions 219 self.scoring_residuals_ = scoring_residuals 220 # Update base_predictions to use training predictions for optimization 221 self.obj_.fit(X_calib, y_calib) 222 base_predictions = self.obj_.predict(X_calib) 223 224 elif self.scoring in ("studentized", "conformal-studentized"): 225 # Calculate student multiplier 226 if self.scoring == "conformal-studentized": 227 X_train, X_calib, y_train, y_calib = train_test_split( 228 X, y, test_size=0.5, random_state=42 229 ) 230 self.obj_.fit(X_train, y_train) 231 scoring_residuals = y_calib - self.obj_.predict(X_calib) 232 # Calculate studentized multiplier using calibration data 233 self.student_multiplier_ = np.std(y_calib, ddof=1) / np.sqrt( 234 len(y_calib) - 1 235 ) 236 self.obj_.fit(X_calib, y_calib) 237 base_predictions = self.obj_.predict(X_calib) 238 else: # regular studentized 239 self.obj_.fit(X, y) 240 base_predictions = self.obj_.predict(X) 241 scoring_residuals = y - base_predictions 242 self.student_multiplier_ = np.std(y, ddof=1) / np.sqrt( 243 len(y) - 1 244 ) 245 246 # Initialize storage for multipliers 247 self.offset_multipliers_ = [] 248 # Keep track of current predictions for each quantile 249 current_predictions = None 250 251 # Fit each quantile sequentially 252 for i, quantile in enumerate(self.quantiles): 253 if self.scoring == "predictions": 254 multiplier = self._optimize_multiplier( 255 y=y, 256 base_predictions=base_predictions, 257 prev_predictions=current_predictions, 258 quantile=quantile, 259 ) 260 261 self.offset_multipliers_.append(multiplier) 262 263 # Update current predictions 264 if current_predictions is None: 265 # First quantile (lowest) 266 current_predictions = ( 267 base_predictions - multiplier * np.abs(base_predictions) 268 ) 269 else: 270 # Subsequent quantiles 271 offset = multiplier * np.abs(base_predictions) 272 current_predictions = current_predictions + offset 273 274 elif self.scoring == "residuals": 275 multiplier = self._optimize_multiplier( 276 y=y, 277 base_predictions=base_predictions, 278 scoring_residuals=scoring_residuals, 279 prev_predictions=current_predictions, 280 quantile=quantile, 281 ) 282 283 self.offset_multipliers_.append(multiplier) 284 285 # Update current predictions 286 if current_predictions is None: 287 # First quantile (lowest) 288 current_predictions = ( 289 base_predictions 290 - multiplier 291 * np.std(scoring_residuals) 292 / np.sqrt(len(scoring_residuals)) 293 ) 294 else: 295 # Subsequent quantiles 296 offset = ( 297 multiplier 298 * np.std(scoring_residuals) 299 / np.sqrt(len(scoring_residuals)) 300 ) 301 current_predictions = current_predictions + offset 302 303 elif self.scoring == "conformal": 304 multiplier = self._optimize_multiplier( 305 y=y_calib, 306 base_predictions=base_predictions, 307 scoring_residuals=scoring_residuals, 308 prev_predictions=current_predictions, 309 quantile=quantile, 310 ) 311 312 self.offset_multipliers_.append(multiplier) 313 314 # Update current predictions 315 if current_predictions is None: 316 # First quantile (lowest) 317 current_predictions = ( 318 base_predictions 319 - multiplier 320 * np.std(scoring_residuals) 321 / np.sqrt(len(scoring_residuals)) 322 ) 323 else: 324 # Subsequent quantiles 325 offset = ( 326 multiplier 327 * np.std(scoring_residuals) 328 / np.sqrt(len(scoring_residuals)) 329 ) 330 current_predictions = current_predictions + offset 331 332 elif self.scoring in ("studentized", "conformal-studentized"): 333 multiplier = self._optimize_multiplier( 334 y=y_calib if self.scoring == "conformal-studentized" else y, 335 base_predictions=base_predictions, 336 scoring_residuals=scoring_residuals, 337 prev_predictions=current_predictions, 338 quantile=quantile, 339 ) 340 341 self.offset_multipliers_.append(multiplier) 342 343 # Update current predictions 344 if current_predictions is None: 345 current_predictions = ( 346 base_predictions - multiplier * self.student_multiplier_ 347 ) 348 else: 349 offset = multiplier * self.student_multiplier_ 350 current_predictions = current_predictions + offset 351 352 return self
Fit the model to the data.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
354 def predict(self, X, return_pi=False): 355 """Predict the target variable. 356 357 Parameters: 358 359 X: {array-like}, shape = [n_samples, n_features] 360 Training vectors, where n_samples is the number of samples and 361 n_features is the number of features. 362 363 return_pi: bool, default=True 364 Whether to return the prediction intervals. 365 """ 366 if self.obj_ is None or self.offset_multipliers_ is None: 367 raise ValueError("Model not fitted yet.") 368 369 base_predictions = self.obj_.predict(X) 370 all_predictions = [] 371 372 if self.scoring == "predictions": 373 # Generate first quantile 374 current_predictions = base_predictions - self.offset_multipliers_[ 375 0 376 ] * np.abs(base_predictions) 377 all_predictions.append(current_predictions) 378 379 # Generate remaining quantiles 380 for multiplier in self.offset_multipliers_[1:]: 381 offset = multiplier * np.abs(base_predictions) 382 current_predictions = current_predictions + offset 383 all_predictions.append(current_predictions) 384 385 elif self.scoring in ("residuals", "conformal"): 386 # Generate first quantile 387 current_predictions = base_predictions - self.offset_multipliers_[ 388 0 389 ] * np.std(self.scoring_residuals_) / np.sqrt( 390 len(self.scoring_residuals_) 391 ) 392 all_predictions.append(current_predictions) 393 394 # Generate remaining quantiles 395 for multiplier in self.offset_multipliers_[1:]: 396 offset = ( 397 multiplier 398 * np.std(self.scoring_residuals_) 399 / np.sqrt(len(self.scoring_residuals_)) 400 ) 401 current_predictions = current_predictions + offset 402 all_predictions.append(current_predictions) 403 404 elif self.scoring in ("studentized", "conformal-studentized"): 405 # Generate first quantile 406 current_predictions = ( 407 base_predictions 408 - self.offset_multipliers_[0] * self.student_multiplier_ 409 ) 410 all_predictions.append(current_predictions) 411 412 # Generate remaining quantiles 413 for multiplier in self.offset_multipliers_[1:]: 414 offset = multiplier * self.student_multiplier_ 415 current_predictions = current_predictions + offset 416 all_predictions.append(current_predictions) 417 418 if return_pi == False: 419 return np.asarray(all_predictions[1]) 420 421 DescribeResult = namedtuple( 422 "DecribeResult", ["mean", "lower", "upper", "median"] 423 ) 424 DescribeResult.mean = base_predictions 425 DescribeResult.lower = np.asarray(all_predictions[0]) 426 DescribeResult.median = np.asarray(all_predictions[1]) 427 DescribeResult.upper = np.asarray(all_predictions[2]) 428 429 return DescribeResult
Predict the target variable.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
return_pi: bool, default=True
Whether to return the prediction intervals.
43class QuantileClassifier(BaseEstimator, ClassifierMixin): 44 """ 45 Quantile Classifier. 46 47 Parameters: 48 49 obj: base model (classification model) 50 The base classifier from which to build a 51 quantile classifier. 52 53 level: int, default=95 54 The level of the quantiles to compute. 55 56 scoring: str, default="predictions" 57 The scoring to use for the optimization and constructing 58 prediction intervals (predictions, residuals, conformal, 59 studentized, conformal-studentized). 60 61 Attributes: 62 63 obj_ : base model (classification model) 64 The base classifier from which to build a 65 quantile classifier. 66 67 offset_multipliers_ : list 68 The multipliers for the offset. 69 70 scoring_residuals_ : list 71 The residuals for the scoring. 72 73 student_multiplier_ : float 74 The multiplier for the student. 75 76 77 """ 78 79 def __init__(self, obj, level=95, scoring="predictions"): 80 assert scoring in ( 81 "predictions", 82 "residuals", 83 "conformal", 84 "studentized", 85 "conformal-studentized", 86 ), "scoring must be 'predictions' or 'residuals' or 'conformal' or 'studentized' or 'conformal-studentized'" 87 self.obj = obj 88 self.level = level 89 self.scoring = scoring 90 quantileregressor = QuantileRegressor( 91 self.obj, self.level, self.scoring 92 ) 93 quantileregressor.predict = partial( 94 quantileregressor.predict, return_pi=False 95 ) 96 self.obj_ = SimpleMultitaskClassifier(quantileregressor) 97 98 def fit(self, X, y, **kwargs): 99 self.obj_.fit(X, y, **kwargs) 100 101 def predict(self, X, **kwargs): 102 return self.obj_.predict(X, **kwargs) 103 104 def predict_proba(self, X, **kwargs): 105 return self.obj_.predict_proba(X, **kwargs)
Quantile Classifier.
Parameters:
obj: base model (classification model)
The base classifier from which to build a
quantile classifier.
level: int, default=95
The level of the quantiles to compute.
scoring: str, default="predictions"
The scoring to use for the optimization and constructing
prediction intervals (predictions, residuals, conformal,
studentized, conformal-studentized).
Attributes:
obj_ : base model (classification model)
The base classifier from which to build a
quantile classifier.
offset_multipliers_ : list
The multipliers for the offset.
scoring_residuals_ : list
The residuals for the scoring.
student_multiplier_ : float
The multiplier for the student.
18class RandomBagRegressor(RandomBag, RegressorMixin): 19 """Randomized 'Bagging' Regression model 20 21 Parameters: 22 23 obj: object 24 any object containing a method fit (obj.fit()) and a method predict 25 (obj.predict()) 26 27 n_estimators: int 28 number of boosting iterations 29 30 n_hidden_features: int 31 number of nodes in the hidden layer 32 33 activation_name: str 34 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 35 36 a: float 37 hyperparameter for 'prelu' or 'elu' activation function 38 39 nodes_sim: str 40 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 41 'uniform' 42 43 bias: boolean 44 indicates if the hidden layer contains a bias term (True) or not 45 (False) 46 47 dropout: float 48 regularization parameter; (random) percentage of nodes dropped out 49 of the training 50 51 direct_link: boolean 52 indicates if the original predictors are included (True) in model''s 53 fitting or not (False) 54 55 n_clusters: int 56 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 57 no clustering) 58 59 cluster_encode: bool 60 defines how the variable containing clusters is treated (default is one-hot) 61 if `False`, then labels are used, without one-hot encoding 62 63 type_clust: str 64 type of clustering method: currently k-means ('kmeans') or Gaussian 65 Mixture Model ('gmm') 66 67 type_scaling: a tuple of 3 strings 68 scaling methods for inputs, hidden layer, and clustering respectively 69 (and when relevant). 70 Currently available: standardization ('std') or MinMax scaling ('minmax') 71 72 col_sample: float 73 percentage of covariates randomly chosen for training 74 75 row_sample: float 76 percentage of rows chosen for training, by stratified bootstrapping 77 78 seed: int 79 reproducibility seed for nodes_sim=='uniform' 80 81 backend: str 82 "cpu" or "gpu" or "tpu" 83 84 Attributes: 85 86 voter_: dict 87 dictionary containing all the fitted base-learners 88 89 90 Examples: 91 92 ```python 93 import numpy as np 94 import nnetsauce as ns 95 from sklearn.datasets import fetch_california_housing 96 from sklearn.tree import DecisionTreeRegressor 97 from sklearn.model_selection import train_test_split 98 99 X, y = fetch_california_housing(return_X_y=True, as_frame=False) 100 101 # split data into training test and test set 102 X_train, X_test, y_train, y_test = train_test_split(X, y, 103 test_size=0.2, random_state=13) 104 105 # Requires further tuning 106 obj = DecisionTreeRegressor(max_depth=3, random_state=123) 107 obj2 = ns.RandomBagRegressor(obj=obj, direct_link=False, 108 n_estimators=50, 109 col_sample=0.9, row_sample=0.9, 110 dropout=0, n_clusters=0, verbose=1) 111 112 obj2.fit(X_train, y_train) 113 114 print(np.sqrt(obj2.score(X_test, y_test))) # RMSE 115 116 ``` 117 118 """ 119 120 # construct the object ----- 121 122 def __init__( 123 self, 124 obj, 125 n_estimators=10, 126 n_hidden_features=1, 127 activation_name="relu", 128 a=0.01, 129 nodes_sim="sobol", 130 bias=True, 131 dropout=0, 132 direct_link=False, 133 n_clusters=2, 134 cluster_encode=True, 135 type_clust="kmeans", 136 type_scaling=("std", "std", "std"), 137 col_sample=1, 138 row_sample=1, 139 n_jobs=None, 140 seed=123, 141 verbose=1, 142 backend="cpu", 143 ): 144 super().__init__( 145 obj=obj, 146 n_estimators=n_estimators, 147 n_hidden_features=n_hidden_features, 148 activation_name=activation_name, 149 a=a, 150 nodes_sim=nodes_sim, 151 bias=bias, 152 dropout=dropout, 153 direct_link=direct_link, 154 n_clusters=n_clusters, 155 cluster_encode=cluster_encode, 156 type_clust=type_clust, 157 type_scaling=type_scaling, 158 col_sample=col_sample, 159 row_sample=row_sample, 160 seed=seed, 161 backend=backend, 162 ) 163 164 self.type_fit = "regression" 165 self.verbose = verbose 166 self.n_jobs = n_jobs 167 self.voter_ = {} 168 169 def fit(self, X, y, **kwargs): 170 """Fit Random 'Bagging' model to training data (X, y). 171 172 Args: 173 174 X: {array-like}, shape = [n_samples, n_features] 175 Training vectors, where n_samples is the number 176 of samples and n_features is the number of features. 177 178 y: array-like, shape = [n_samples] 179 Target values. 180 181 **kwargs: additional parameters to be passed to 182 self.cook_training_set or self.obj.fit 183 184 Returns: 185 186 self: object 187 188 """ 189 190 base_learner = CustomRegressor( 191 self.obj, 192 n_hidden_features=self.n_hidden_features, 193 activation_name=self.activation_name, 194 a=self.a, 195 nodes_sim=self.nodes_sim, 196 bias=self.bias, 197 dropout=self.dropout, 198 direct_link=self.direct_link, 199 n_clusters=self.n_clusters, 200 type_clust=self.type_clust, 201 type_scaling=self.type_scaling, 202 col_sample=self.col_sample, 203 row_sample=self.row_sample, 204 seed=self.seed, 205 ) 206 207 # 1 - Sequential training ----- 208 209 if self.n_jobs is None: 210 self.voter_ = rbagloop_regression( 211 base_learner, X, y, self.n_estimators, self.verbose, self.seed 212 ) 213 214 self.n_estimators = len(self.voter_) 215 216 return self 217 218 # 2 - Parallel training ----- 219 # buggy 220 # if self.n_jobs is not None: 221 def fit_estimators(m): 222 base_learner__ = deepcopy(base_learner) 223 base_learner__.set_params(seed=self.seed + m * 1000) 224 base_learner__.fit(X, y, **kwargs) 225 return base_learner__ 226 227 if self.verbose == 1: 228 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 229 delayed(fit_estimators)(m) 230 for m in tqdm(range(self.n_estimators)) 231 ) 232 else: 233 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 234 delayed(fit_estimators)(m) for m in range(self.n_estimators) 235 ) 236 237 self.voter_ = {i: elt for i, elt in enumerate(voters_list)} 238 239 self.n_estimators = len(self.voter_) 240 241 return self 242 243 def predict(self, X, weights=None, **kwargs): 244 """Predict for test data X. 245 246 Args: 247 248 X: {array-like}, shape = [n_samples, n_features] 249 Training vectors, where n_samples is the number 250 of samples and n_features is the number of features. 251 252 **kwargs: additional parameters to be passed to 253 self.cook_test_set 254 255 Returns: 256 257 estimates for test data: {array-like} 258 259 """ 260 261 def calculate_preds(voter, weights=None): 262 ensemble_preds = 0 263 264 n_iter = len(voter) 265 266 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 267 268 if weights is None: 269 for idx, elt in voter.items(): 270 ensemble_preds += elt.predict(X) 271 272 return ensemble_preds / n_iter 273 274 # if weights is not None: 275 for idx, elt in voter.items(): 276 ensemble_preds += weights[idx] * elt.predict(X) 277 278 return ensemble_preds 279 280 # end calculate_preds ---- 281 282 if weights is None: 283 return calculate_preds(self.voter_) 284 285 # if weights is not None: 286 self.weights = weights 287 288 return calculate_preds(self.voter_, weights)
Randomized 'Bagging' Regression model
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_estimators: int
number of boosting iterations
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model''s
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
voter_: dict
dictionary containing all the fitted base-learners
Examples:
import numpy as np
import nnetsauce as ns
from sklearn.datasets import fetch_california_housing
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
X, y = fetch_california_housing(return_X_y=True, as_frame=False)
# split data into training test and test set
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=13)
# Requires further tuning
obj = DecisionTreeRegressor(max_depth=3, random_state=123)
obj2 = ns.RandomBagRegressor(obj=obj, direct_link=False,
n_estimators=50,
col_sample=0.9, row_sample=0.9,
dropout=0, n_clusters=0, verbose=1)
obj2.fit(X_train, y_train)
print(np.sqrt(obj2.score(X_test, y_test))) # RMSE
169 def fit(self, X, y, **kwargs): 170 """Fit Random 'Bagging' model to training data (X, y). 171 172 Args: 173 174 X: {array-like}, shape = [n_samples, n_features] 175 Training vectors, where n_samples is the number 176 of samples and n_features is the number of features. 177 178 y: array-like, shape = [n_samples] 179 Target values. 180 181 **kwargs: additional parameters to be passed to 182 self.cook_training_set or self.obj.fit 183 184 Returns: 185 186 self: object 187 188 """ 189 190 base_learner = CustomRegressor( 191 self.obj, 192 n_hidden_features=self.n_hidden_features, 193 activation_name=self.activation_name, 194 a=self.a, 195 nodes_sim=self.nodes_sim, 196 bias=self.bias, 197 dropout=self.dropout, 198 direct_link=self.direct_link, 199 n_clusters=self.n_clusters, 200 type_clust=self.type_clust, 201 type_scaling=self.type_scaling, 202 col_sample=self.col_sample, 203 row_sample=self.row_sample, 204 seed=self.seed, 205 ) 206 207 # 1 - Sequential training ----- 208 209 if self.n_jobs is None: 210 self.voter_ = rbagloop_regression( 211 base_learner, X, y, self.n_estimators, self.verbose, self.seed 212 ) 213 214 self.n_estimators = len(self.voter_) 215 216 return self 217 218 # 2 - Parallel training ----- 219 # buggy 220 # if self.n_jobs is not None: 221 def fit_estimators(m): 222 base_learner__ = deepcopy(base_learner) 223 base_learner__.set_params(seed=self.seed + m * 1000) 224 base_learner__.fit(X, y, **kwargs) 225 return base_learner__ 226 227 if self.verbose == 1: 228 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 229 delayed(fit_estimators)(m) 230 for m in tqdm(range(self.n_estimators)) 231 ) 232 else: 233 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 234 delayed(fit_estimators)(m) for m in range(self.n_estimators) 235 ) 236 237 self.voter_ = {i: elt for i, elt in enumerate(voters_list)} 238 239 self.n_estimators = len(self.voter_) 240 241 return self
Fit Random 'Bagging' model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
243 def predict(self, X, weights=None, **kwargs): 244 """Predict for test data X. 245 246 Args: 247 248 X: {array-like}, shape = [n_samples, n_features] 249 Training vectors, where n_samples is the number 250 of samples and n_features is the number of features. 251 252 **kwargs: additional parameters to be passed to 253 self.cook_test_set 254 255 Returns: 256 257 estimates for test data: {array-like} 258 259 """ 260 261 def calculate_preds(voter, weights=None): 262 ensemble_preds = 0 263 264 n_iter = len(voter) 265 266 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 267 268 if weights is None: 269 for idx, elt in voter.items(): 270 ensemble_preds += elt.predict(X) 271 272 return ensemble_preds / n_iter 273 274 # if weights is not None: 275 for idx, elt in voter.items(): 276 ensemble_preds += weights[idx] * elt.predict(X) 277 278 return ensemble_preds 279 280 # end calculate_preds ---- 281 282 if weights is None: 283 return calculate_preds(self.voter_) 284 285 # if weights is not None: 286 self.weights = weights 287 288 return calculate_preds(self.voter_, weights)
Predict for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
estimates for test data: {array-like}
18class RandomBagClassifier(RandomBag, ClassifierMixin): 19 """Randomized 'Bagging' Classification model 20 21 Parameters: 22 23 obj: object 24 any object containing a method fit (obj.fit()) and a method predict 25 (obj.predict()) 26 27 n_estimators: int 28 number of boosting iterations 29 30 n_hidden_features: int 31 number of nodes in the hidden layer 32 33 activation_name: str 34 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 35 36 a: float 37 hyperparameter for 'prelu' or 'elu' activation function 38 39 nodes_sim: str 40 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 41 'uniform' 42 43 bias: boolean 44 indicates if the hidden layer contains a bias term (True) or not 45 (False) 46 47 dropout: float 48 regularization parameter; (random) percentage of nodes dropped out 49 of the training 50 51 direct_link: boolean 52 indicates if the original predictors are included (True) in model's 53 fitting or not (False) 54 55 n_clusters: int 56 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 57 no clustering) 58 59 cluster_encode: bool 60 defines how the variable containing clusters is treated (default is one-hot) 61 if `False`, then labels are used, without one-hot encoding 62 63 type_clust: str 64 type of clustering method: currently k-means ('kmeans') or Gaussian 65 Mixture Model ('gmm') 66 67 type_scaling: a tuple of 3 strings 68 scaling methods for inputs, hidden layer, and clustering respectively 69 (and when relevant). 70 Currently available: standardization ('std') or MinMax scaling ('minmax') 71 72 col_sample: float 73 percentage of covariates randomly chosen for training 74 75 row_sample: float 76 percentage of rows chosen for training, by stratified bootstrapping 77 78 seed: int 79 reproducibility seed for nodes_sim=='uniform' 80 81 backend: str 82 "cpu" or "gpu" or "tpu" 83 84 Attributes: 85 86 voter_: dict 87 dictionary containing all the fitted base-learners 88 89 90 Examples: 91 92 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py) 93 94 ```python 95 import nnetsauce as ns 96 from sklearn.datasets import load_breast_cancer 97 from sklearn.tree import DecisionTreeClassifier 98 from sklearn.model_selection import train_test_split 99 from sklearn import metrics 100 from time import time 101 102 103 breast_cancer = load_breast_cancer() 104 Z = breast_cancer.data 105 t = breast_cancer.target 106 np.random.seed(123) 107 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2) 108 109 # decision tree 110 clf = DecisionTreeClassifier(max_depth=2, random_state=123) 111 fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=2, 112 direct_link=True, 113 n_estimators=100, 114 col_sample=0.9, row_sample=0.9, 115 dropout=0.3, n_clusters=0, verbose=1) 116 117 start = time() 118 fit_obj.fit(X_train, y_train) 119 print(f"Elapsed {time() - start}") 120 121 print(fit_obj.score(X_test, y_test)) 122 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 123 124 start = time() 125 preds = fit_obj.predict(X_test) 126 print(f"Elapsed {time() - start}") 127 print(metrics.classification_report(preds, y_test)) 128 ``` 129 130 """ 131 132 # construct the object ----- 133 _estimator_type = "classifier" 134 135 def __init__( 136 self, 137 obj, 138 n_estimators=10, 139 n_hidden_features=1, 140 activation_name="relu", 141 a=0.01, 142 nodes_sim="sobol", 143 bias=True, 144 dropout=0, 145 direct_link=False, 146 n_clusters=2, 147 cluster_encode=True, 148 type_clust="kmeans", 149 type_scaling=("std", "std", "std"), 150 col_sample=1, 151 row_sample=1, 152 n_jobs=None, 153 seed=123, 154 verbose=1, 155 backend="cpu", 156 ): 157 super().__init__( 158 obj=obj, 159 n_estimators=n_estimators, 160 n_hidden_features=n_hidden_features, 161 activation_name=activation_name, 162 a=a, 163 nodes_sim=nodes_sim, 164 bias=bias, 165 dropout=dropout, 166 direct_link=direct_link, 167 n_clusters=n_clusters, 168 cluster_encode=cluster_encode, 169 type_clust=type_clust, 170 type_scaling=type_scaling, 171 col_sample=col_sample, 172 row_sample=row_sample, 173 seed=seed, 174 backend=backend, 175 ) 176 177 self.type_fit = "classification" 178 self.verbose = verbose 179 self.n_jobs = n_jobs 180 self.voter_ = {} 181 182 def fit(self, X, y, **kwargs): 183 """Fit Random 'Bagging' model to training data (X, y). 184 185 Args: 186 187 X: {array-like}, shape = [n_samples, n_features] 188 Training vectors, where n_samples is the number 189 of samples and n_features is the number of features. 190 191 y: array-like, shape = [n_samples] 192 Target values. 193 194 **kwargs: additional parameters to be passed to 195 self.cook_training_set or self.obj.fit 196 197 Returns: 198 199 self: object 200 201 """ 202 203 assert mx.is_factor(y), "y must contain only integers" 204 205 self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn 206 207 # training 208 self.n_classes = len(np.unique(y)) 209 210 base_learner = CustomClassifier( 211 self.obj, 212 n_hidden_features=self.n_hidden_features, 213 activation_name=self.activation_name, 214 a=self.a, 215 nodes_sim=self.nodes_sim, 216 bias=self.bias, 217 dropout=self.dropout, 218 direct_link=self.direct_link, 219 n_clusters=self.n_clusters, 220 type_clust=self.type_clust, 221 type_scaling=self.type_scaling, 222 col_sample=self.col_sample, 223 row_sample=self.row_sample, 224 seed=self.seed, 225 cv_calibration=None, 226 ) 227 228 # 1 - Sequential training ----- 229 230 if self.n_jobs is None: 231 self.voter_ = rbagloop_classification( 232 base_learner, X, y, self.n_estimators, self.verbose, self.seed 233 ) 234 235 self.n_estimators = len(self.voter_) 236 237 return self 238 239 # 2 - Parallel training ----- 240 # buggy 241 # if self.n_jobs is not None: 242 def fit_estimators(m): 243 base_learner__ = deepcopy(base_learner) 244 base_learner__.set_params(seed=self.seed + m * 1000) 245 base_learner__.fit(X, y, **kwargs) 246 return base_learner__ 247 248 if self.verbose == 1: 249 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 250 delayed(fit_estimators)(m) 251 for m in tqdm(range(self.n_estimators)) 252 ) 253 else: 254 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 255 delayed(fit_estimators)(m) for m in range(self.n_estimators) 256 ) 257 258 self.voter_ = {idx: elt for idx, elt in enumerate(voters_list)} 259 260 self.n_estimators = len(self.voter_) 261 self.classes_ = np.unique(y) 262 return self 263 264 def predict(self, X, weights=None, **kwargs): 265 """Predict test data X. 266 267 Args: 268 269 X: {array-like}, shape = [n_samples, n_features] 270 Training vectors, where n_samples is the number 271 of samples and n_features is the number of features. 272 273 **kwargs: additional parameters to be passed to 274 self.cook_test_set 275 276 Returns: 277 278 model predictions: {array-like} 279 280 """ 281 return self.predict_proba(X, weights, **kwargs).argmax(axis=1) 282 283 def predict_proba(self, X, weights=None, **kwargs): 284 """Predict probabilities for test data X. 285 286 Args: 287 288 X: {array-like}, shape = [n_samples, n_features] 289 Training vectors, where n_samples is the number 290 of samples and n_features is the number of features. 291 292 **kwargs: additional parameters to be passed to 293 self.cook_test_set 294 295 Returns: 296 297 probability estimates for test data: {array-like} 298 299 """ 300 301 def calculate_probas(voter, weights=None, verbose=None): 302 ensemble_proba = 0 303 304 n_iter = len(voter) 305 306 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 307 308 if weights is None: 309 for idx, elt in voter.items(): 310 try: 311 ensemble_proba += elt.predict_proba(X) 312 313 # if verbose == 1: 314 # pbar.update(idx) 315 316 except: 317 continue 318 319 # if verbose == 1: 320 # pbar.update(n_iter) 321 322 return ensemble_proba / n_iter 323 324 # if weights is not None: 325 for idx, elt in voter.items(): 326 ensemble_proba += weights[idx] * elt.predict_proba(X) 327 328 # if verbose == 1: 329 # pbar.update(idx) 330 331 # if verbose == 1: 332 # pbar.update(n_iter) 333 334 return ensemble_proba 335 336 # end calculate_probas ---- 337 338 if self.n_jobs is None: 339 # if self.verbose == 1: 340 # pbar = Progbar(self.n_estimators) 341 342 if weights is None: 343 return calculate_probas(self.voter_, verbose=self.verbose) 344 345 # if weights is not None: 346 self.weights = weights 347 348 return calculate_probas(self.voter_, weights, verbose=self.verbose) 349 350 # if self.n_jobs is not None: 351 def predict_estimator(m): 352 try: 353 return self.voter_[m].predict_proba(X) 354 except: 355 pass 356 357 if self.verbose == 1: 358 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 359 delayed(predict_estimator)(m) 360 for m in tqdm(range(self.n_estimators)) 361 ) 362 363 else: 364 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 365 delayed(predict_estimator)(m) for m in range(self.n_estimators) 366 ) 367 368 ensemble_proba = 0 369 370 if weights is None: 371 for i in range(self.n_estimators): 372 ensemble_proba += preds[i] 373 374 return ensemble_proba / self.n_estimators 375 376 for i in range(self.n_estimators): 377 ensemble_proba += weights[i] * preds[i] 378 379 return ensemble_proba 380 381 @property 382 def _estimator_type(self): 383 return "classifier"
Randomized 'Bagging' Classification model
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_estimators: int
number of boosting iterations
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
voter_: dict
dictionary containing all the fitted base-learners
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py
import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
# decision tree
clf = DecisionTreeClassifier(max_depth=2, random_state=123)
fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=2,
direct_link=True,
n_estimators=100,
col_sample=0.9, row_sample=0.9,
dropout=0.3, n_clusters=0, verbose=1)
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
182 def fit(self, X, y, **kwargs): 183 """Fit Random 'Bagging' model to training data (X, y). 184 185 Args: 186 187 X: {array-like}, shape = [n_samples, n_features] 188 Training vectors, where n_samples is the number 189 of samples and n_features is the number of features. 190 191 y: array-like, shape = [n_samples] 192 Target values. 193 194 **kwargs: additional parameters to be passed to 195 self.cook_training_set or self.obj.fit 196 197 Returns: 198 199 self: object 200 201 """ 202 203 assert mx.is_factor(y), "y must contain only integers" 204 205 self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn 206 207 # training 208 self.n_classes = len(np.unique(y)) 209 210 base_learner = CustomClassifier( 211 self.obj, 212 n_hidden_features=self.n_hidden_features, 213 activation_name=self.activation_name, 214 a=self.a, 215 nodes_sim=self.nodes_sim, 216 bias=self.bias, 217 dropout=self.dropout, 218 direct_link=self.direct_link, 219 n_clusters=self.n_clusters, 220 type_clust=self.type_clust, 221 type_scaling=self.type_scaling, 222 col_sample=self.col_sample, 223 row_sample=self.row_sample, 224 seed=self.seed, 225 cv_calibration=None, 226 ) 227 228 # 1 - Sequential training ----- 229 230 if self.n_jobs is None: 231 self.voter_ = rbagloop_classification( 232 base_learner, X, y, self.n_estimators, self.verbose, self.seed 233 ) 234 235 self.n_estimators = len(self.voter_) 236 237 return self 238 239 # 2 - Parallel training ----- 240 # buggy 241 # if self.n_jobs is not None: 242 def fit_estimators(m): 243 base_learner__ = deepcopy(base_learner) 244 base_learner__.set_params(seed=self.seed + m * 1000) 245 base_learner__.fit(X, y, **kwargs) 246 return base_learner__ 247 248 if self.verbose == 1: 249 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 250 delayed(fit_estimators)(m) 251 for m in tqdm(range(self.n_estimators)) 252 ) 253 else: 254 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 255 delayed(fit_estimators)(m) for m in range(self.n_estimators) 256 ) 257 258 self.voter_ = {idx: elt for idx, elt in enumerate(voters_list)} 259 260 self.n_estimators = len(self.voter_) 261 self.classes_ = np.unique(y) 262 return self
Fit Random 'Bagging' model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
264 def predict(self, X, weights=None, **kwargs): 265 """Predict test data X. 266 267 Args: 268 269 X: {array-like}, shape = [n_samples, n_features] 270 Training vectors, where n_samples is the number 271 of samples and n_features is the number of features. 272 273 **kwargs: additional parameters to be passed to 274 self.cook_test_set 275 276 Returns: 277 278 model predictions: {array-like} 279 280 """ 281 return self.predict_proba(X, weights, **kwargs).argmax(axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
283 def predict_proba(self, X, weights=None, **kwargs): 284 """Predict probabilities for test data X. 285 286 Args: 287 288 X: {array-like}, shape = [n_samples, n_features] 289 Training vectors, where n_samples is the number 290 of samples and n_features is the number of features. 291 292 **kwargs: additional parameters to be passed to 293 self.cook_test_set 294 295 Returns: 296 297 probability estimates for test data: {array-like} 298 299 """ 300 301 def calculate_probas(voter, weights=None, verbose=None): 302 ensemble_proba = 0 303 304 n_iter = len(voter) 305 306 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 307 308 if weights is None: 309 for idx, elt in voter.items(): 310 try: 311 ensemble_proba += elt.predict_proba(X) 312 313 # if verbose == 1: 314 # pbar.update(idx) 315 316 except: 317 continue 318 319 # if verbose == 1: 320 # pbar.update(n_iter) 321 322 return ensemble_proba / n_iter 323 324 # if weights is not None: 325 for idx, elt in voter.items(): 326 ensemble_proba += weights[idx] * elt.predict_proba(X) 327 328 # if verbose == 1: 329 # pbar.update(idx) 330 331 # if verbose == 1: 332 # pbar.update(n_iter) 333 334 return ensemble_proba 335 336 # end calculate_probas ---- 337 338 if self.n_jobs is None: 339 # if self.verbose == 1: 340 # pbar = Progbar(self.n_estimators) 341 342 if weights is None: 343 return calculate_probas(self.voter_, verbose=self.verbose) 344 345 # if weights is not None: 346 self.weights = weights 347 348 return calculate_probas(self.voter_, weights, verbose=self.verbose) 349 350 # if self.n_jobs is not None: 351 def predict_estimator(m): 352 try: 353 return self.voter_[m].predict_proba(X) 354 except: 355 pass 356 357 if self.verbose == 1: 358 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 359 delayed(predict_estimator)(m) 360 for m in tqdm(range(self.n_estimators)) 361 ) 362 363 else: 364 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 365 delayed(predict_estimator)(m) for m in range(self.n_estimators) 366 ) 367 368 ensemble_proba = 0 369 370 if weights is None: 371 for i in range(self.n_estimators): 372 ensemble_proba += preds[i] 373 374 return ensemble_proba / self.n_estimators 375 376 for i in range(self.n_estimators): 377 ensemble_proba += weights[i] * preds[i] 378 379 return ensemble_proba
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
23class RandomFourierEstimator(BaseEstimator): 24 def __init__( 25 self, estimator, n_components=100, gamma=1.0, random_state=None 26 ): 27 """ 28 Random Fourier Features transformation with a given estimator. 29 30 Parameters: 31 - estimator: A scikit-learn estimator (classifier, regressor, etc.). 32 - n_components: Number of random Fourier features. 33 - gamma: Hyperparameter for RBF kernel approximation. 34 - random_state: Random state for reproducibility. 35 """ 36 self.estimator = estimator 37 self.n_components = n_components 38 self.gamma = gamma 39 self.random_state = random_state 40 41 # Dynamically set the estimator type and appropriate mixin 42 estimator_type = _get_estimator_type(estimator) 43 if estimator_type == "classifier": 44 self._estimator_type = "classifier" 45 # Add ClassifierMixin to the class hierarchy 46 if not isinstance(self, ClassifierMixin): 47 self.__class__ = type( 48 self.__class__.__name__, 49 (self.__class__, ClassifierMixin), 50 dict(self.__class__.__dict__), 51 ) 52 elif estimator_type == "regressor": 53 self._estimator_type = "regressor" 54 # Add RegressorMixin to the class hierarchy 55 if not isinstance(self, RegressorMixin): 56 self.__class__ = type( 57 self.__class__.__name__, 58 (self.__class__, RegressorMixin), 59 dict(self.__class__.__dict__), 60 ) 61 62 def fit(self, X, y=None): 63 """ 64 Fit the Random Fourier feature transformer and the estimator. 65 """ 66 X = check_array(X) 67 68 # Initialize and fit the Random Fourier Feature transformer 69 self.rff_ = RBFSampler( 70 n_components=self.n_components, 71 gamma=self.gamma, 72 random_state=self.random_state, 73 ) 74 X_transformed = self.rff_.fit_transform(X) 75 76 # Fit the underlying estimator on the transformed data 77 self.estimator.fit(X_transformed, y) 78 79 return self 80 81 def partial_fit(self, X, y, classes=None): 82 """ 83 Incrementally fit the Random Fourier feature transformer and the estimator. 84 """ 85 X = check_array(X) 86 87 # Check if RFF transformer is already fitted 88 if not hasattr(self, "rff_"): 89 # First call - fit the transformer 90 self.rff_ = RBFSampler( 91 n_components=self.n_components, 92 gamma=self.gamma, 93 random_state=self.random_state, 94 ) 95 X_transformed = self.rff_.fit_transform(X) 96 else: 97 # Subsequent calls - only transform 98 X_transformed = self.rff_.transform(X) 99 100 # If estimator supports partial_fit, we use it, otherwise raise an error 101 if hasattr(self.estimator, "partial_fit"): 102 self.estimator.partial_fit(X_transformed, y, classes=classes) 103 else: 104 raise ValueError( 105 f"The estimator {type(self.estimator).__name__} does not support partial_fit method." 106 ) 107 108 return self 109 110 def predict(self, X): 111 """ 112 Predict using the Random Fourier transformed data. 113 """ 114 check_is_fitted(self, ["rff_"]) 115 X = check_array(X) 116 117 # Transform the input data 118 X_transformed = self.rff_.transform(X) 119 120 # Predict using the underlying estimator 121 return self.estimator.predict(X_transformed) 122 123 def predict_proba(self, X): 124 """ 125 Predict class probabilities (only for classifiers). 126 """ 127 if ( 128 not hasattr(self, "_estimator_type") 129 or self._estimator_type != "classifier" 130 ): 131 raise AttributeError( 132 "predict_proba is not available for this estimator type." 133 ) 134 135 check_is_fitted(self, ["rff_"]) 136 X = check_array(X) 137 138 if not hasattr(self.estimator, "predict_proba"): 139 raise ValueError( 140 f"The estimator {type(self.estimator).__name__} does not support predict_proba." 141 ) 142 143 # Transform the input data 144 X_transformed = self.rff_.transform(X) 145 146 # Predict probabilities using the underlying estimator 147 return self.estimator.predict_proba(X_transformed) 148 149 def predict_log_proba(self, X): 150 """ 151 Predict class log probabilities (only for classifiers). 152 """ 153 if ( 154 not hasattr(self, "_estimator_type") 155 or self._estimator_type != "classifier" 156 ): 157 raise AttributeError( 158 "predict_log_proba is not available for this estimator type." 159 ) 160 161 check_is_fitted(self, ["rff_"]) 162 X = check_array(X) 163 164 if not hasattr(self.estimator, "predict_log_proba"): 165 raise ValueError( 166 f"The estimator {type(self.estimator).__name__} does not support predict_log_proba." 167 ) 168 169 # Transform the input data 170 X_transformed = self.rff_.transform(X) 171 172 return self.estimator.predict_log_proba(X_transformed) 173 174 def decision_function(self, X): 175 """ 176 Decision function (only for classifiers). 177 """ 178 if ( 179 not hasattr(self, "_estimator_type") 180 or self._estimator_type != "classifier" 181 ): 182 raise AttributeError( 183 "decision_function is not available for this estimator type." 184 ) 185 186 check_is_fitted(self, ["rff_"]) 187 X = check_array(X) 188 189 if not hasattr(self.estimator, "decision_function"): 190 raise ValueError( 191 f"The estimator {type(self.estimator).__name__} does not support decision_function." 192 ) 193 194 # Transform the input data 195 X_transformed = self.rff_.transform(X) 196 197 return self.estimator.decision_function(X_transformed) 198 199 def score(self, X, y): 200 """ 201 Evaluate the model performance. 202 """ 203 check_is_fitted(self, ["rff_"]) 204 X = check_array(X) 205 206 # Transform the input data 207 X_transformed = self.rff_.transform(X) 208 209 # Evaluate using the underlying estimator's score method 210 return self.estimator.score(X_transformed, y) 211 212 @property 213 def classes_(self): 214 """Classes labels (only for classifiers).""" 215 if ( 216 hasattr(self, "_estimator_type") 217 and self._estimator_type == "classifier" 218 ): 219 return getattr(self.estimator, "classes_", None) 220 else: 221 raise AttributeError( 222 "classes_ is not available for this estimator type." 223 ) 224 225 def get_params(self, deep=True): 226 """ 227 Get parameters for this estimator. 228 """ 229 params = {} 230 231 # Get estimator parameters with proper prefixing 232 if deep: 233 estimator_params = self.estimator.get_params(deep=True) 234 for key, value in estimator_params.items(): 235 params[f"estimator__{key}"] = value 236 237 # Add our own parameters 238 params.update( 239 { 240 "estimator": self.estimator, 241 "n_components": self.n_components, 242 "gamma": self.gamma, 243 "random_state": self.random_state, 244 } 245 ) 246 247 return params 248 249 def set_params(self, **params): 250 """ 251 Set the parameters of this estimator. 252 """ 253 # Separate our parameters from estimator parameters 254 our_params = {} 255 estimator_params = {} 256 257 for param, value in params.items(): 258 if param.startswith("estimator__"): 259 # Remove the 'estimator__' prefix 260 estimator_params[param[11:]] = value 261 elif param in [ 262 "estimator", 263 "n_components", 264 "gamma", 265 "random_state", 266 ]: 267 our_params[param] = value 268 else: 269 # Assume it's an estimator parameter without prefix 270 estimator_params[param] = value 271 272 # Set our parameters 273 for param, value in our_params.items(): 274 setattr(self, param, value) 275 276 # If estimator changed, update the estimator type 277 if "estimator" in our_params: 278 self.__init__( 279 self.estimator, self.n_components, self.gamma, self.random_state 280 ) 281 282 # Set estimator parameters 283 if estimator_params: 284 self.estimator.set_params(**estimator_params) 285 286 # If RFF parameters changed and model is fitted, we need to refit 287 if hasattr(self, "rff_") and ( 288 "n_components" in our_params 289 or "gamma" in our_params 290 or "random_state" in our_params 291 ): 292 # Remove the fitted transformer so it gets recreated on next fit 293 delattr(self, "rff_") 294 295 return self
Base class for all estimators in scikit-learn.
Inheriting from this class provides default implementations of:
- setting and getting parameters used by
GridSearchCVand friends; - textual and HTML representation displayed in terminals and IDEs;
- estimator serialization;
- parameters validation;
- data validation;
- feature names validation.
Read more in the :ref:User Guide <rolling_your_own_estimator>.
Notes
All estimators should specify all the parameters that can be set
at the class level in their __init__ as explicit keyword
arguments (no *args or **kwargs).
Examples
>>> import numpy as np
>>> from sklearn.base import BaseEstimator
>>> class MyEstimator(BaseEstimator):
... def __init__(self, *, param=1):
... self.param = param
... def fit(self, X, y=None):
... self.is_fitted_ = True
... return self
... def predict(self, X):
... return np.full(shape=X.shape[0], fill_value=self.param)
>>> estimator = MyEstimator(param=2)
>>> estimator.get_params()
{'param': 2}
>>> X = np.array([[1, 2], [2, 3], [3, 4]])
>>> y = np.array([1, 0, 1])
>>> estimator.fit(X, y).predict(X)
array([2, 2, 2])
>>> estimator.set_params(param=3).fit(X, y).predict(X)
array([3, 3, 3])
62 def fit(self, X, y=None): 63 """ 64 Fit the Random Fourier feature transformer and the estimator. 65 """ 66 X = check_array(X) 67 68 # Initialize and fit the Random Fourier Feature transformer 69 self.rff_ = RBFSampler( 70 n_components=self.n_components, 71 gamma=self.gamma, 72 random_state=self.random_state, 73 ) 74 X_transformed = self.rff_.fit_transform(X) 75 76 # Fit the underlying estimator on the transformed data 77 self.estimator.fit(X_transformed, y) 78 79 return self
Fit the Random Fourier feature transformer and the estimator.
110 def predict(self, X): 111 """ 112 Predict using the Random Fourier transformed data. 113 """ 114 check_is_fitted(self, ["rff_"]) 115 X = check_array(X) 116 117 # Transform the input data 118 X_transformed = self.rff_.transform(X) 119 120 # Predict using the underlying estimator 121 return self.estimator.predict(X_transformed)
Predict using the Random Fourier transformed data.
123 def predict_proba(self, X): 124 """ 125 Predict class probabilities (only for classifiers). 126 """ 127 if ( 128 not hasattr(self, "_estimator_type") 129 or self._estimator_type != "classifier" 130 ): 131 raise AttributeError( 132 "predict_proba is not available for this estimator type." 133 ) 134 135 check_is_fitted(self, ["rff_"]) 136 X = check_array(X) 137 138 if not hasattr(self.estimator, "predict_proba"): 139 raise ValueError( 140 f"The estimator {type(self.estimator).__name__} does not support predict_proba." 141 ) 142 143 # Transform the input data 144 X_transformed = self.rff_.transform(X) 145 146 # Predict probabilities using the underlying estimator 147 return self.estimator.predict_proba(X_transformed)
Predict class probabilities (only for classifiers).
199 def score(self, X, y): 200 """ 201 Evaluate the model performance. 202 """ 203 check_is_fitted(self, ["rff_"]) 204 X = check_array(X) 205 206 # Transform the input data 207 X_transformed = self.rff_.transform(X) 208 209 # Evaluate using the underlying estimator's score method 210 return self.estimator.score(X_transformed, y)
Evaluate the model performance.
17class RandomFourierFeaturesRidge(BaseEstimator, RegressorMixin): 18 """ 19 Random Fourier Features with Bayesian Ridge Regression. 20 21 Implements both standard (MLE) and Bayesian versions with uncertainty quantification. 22 Uses data augmentation for L2 regularization via jnp.lstsq. 23 """ 24 25 def __init__( 26 self, 27 n_features: int = 100, 28 gamma: float = 1.0, 29 alpha: float = 1e-6, 30 include_bias: bool = True, 31 random_seed: int = 42, 32 ): 33 """ 34 Parameters: 35 ----------- 36 n_features : int 37 Number of random Fourier features (D) 38 gamma : float 39 RBF kernel parameter: k(x,y) = exp(-gamma * ||x-y||²) 40 alpha : float 41 Prior precision (inverse variance) for Bayesian version 42 Equivalent to regularization strength: lambda = alpha / beta 43 include_bias : bool 44 Whether to include a bias term 45 random_seed : int 46 Random seed for reproducibility 47 """ 48 49 if not JAX_AVAILABLE: 50 raise RuntimeError( 51 "JAX is required for this feature. Install with: pip install yourpackage[jax]" 52 ) 53 54 self.n_features = n_features 55 self.gamma = gamma 56 self.alpha = alpha 57 self.include_bias = include_bias 58 self.key = random.PRNGKey(random_seed) 59 self.is_fitted = False 60 61 # Bayesian parameters 62 self.beta = None # Noise precision (will be estimated from data) 63 self.w_mean = None # Posterior mean of weights 64 self.w_cov = None # Posterior covariance of weights 65 self.S_N = None # Posterior precision matrix 66 67 def _compute_random_features(self, X, W, b): 68 """Compute random Fourier features: sqrt(2/D) * cos(XW + b)""" 69 projection = jnp.dot(X, W) + b # Shape: (n_samples, n_features) 70 features = jnp.sqrt(2.0 / self.n_features) * jnp.cos(projection) 71 72 if self.include_bias: 73 features = jnp.concatenate( 74 [jnp.ones((X.shape[0], 1)), features], axis=1 75 ) 76 77 return features 78 79 def _init_random_weights(self, input_dim): 80 """Initialize random weights and biases for RFF""" 81 # Sample from Gaussian distribution for RBF kernel 82 # Variance = 2 * gamma for RBF kernel 83 self.key, subkey = random.split(self.key) 84 W = random.normal( 85 subkey, shape=(input_dim, self.n_features) 86 ) * jnp.sqrt(2.0 * self.gamma) 87 88 self.key, subkey = random.split(self.key) 89 b = random.uniform( 90 subkey, shape=(1, self.n_features), minval=0, maxval=2 * jnp.pi 91 ) 92 93 return W, b 94 95 def fit( 96 self, 97 X, 98 y, 99 method="bayesian", 100 noise_variance=None, 101 ): 102 """ 103 Fit the model using either standard or Bayesian ridge regression. 104 105 Parameters: 106 ----------- 107 X : array-like, shape (n_samples, n_features) 108 Training data 109 y : array-like, shape (n_samples,) or (n_samples, n_targets) 110 Target values 111 method : str, either "standard" or "bayesian" 112 "standard": Maximum likelihood estimation with L2 regularization 113 "bayesian": Full Bayesian inference with uncertainty quantification 114 noise_variance : float, optional 115 If provided, fixes the noise variance instead of estimating it 116 """ 117 # Convert to JAX arrays if needed 118 X = jnp.asarray(X) 119 y = jnp.asarray(y) 120 121 if len(y.shape) == 1: 122 y = y.reshape(-1, 1) 123 124 n_samples, input_dim = X.shape 125 126 # Initialize random Fourier weights 127 self.W, self.b = self._init_random_weights(input_dim) 128 129 # Compute random Fourier features 130 Phi = self._compute_random_features(X, self.W, self.b) 131 n_basis = Phi.shape[1] # D + 1 if bias included 132 133 # Store feature matrix and target values for Bayesian updates/likelihood computation 134 self.Phi_train = Phi 135 self.y_train = y # Store y_train 136 137 if method == "standard": 138 # Standard ridge regression using data augmentation for regularization 139 self._fit_standard(Phi, y) 140 elif method == "bayesian": 141 # Bayesian ridge regression 142 self._fit_bayesian(Phi, y, noise_variance) 143 else: 144 raise ValueError("method must be 'standard' or 'bayesian'") 145 146 self.is_fitted = True 147 self.method = method 148 self.input_dim = input_dim 149 150 return self 151 152 def _fit_standard(self, Phi, y) -> None: 153 """Standard ridge regression using lstsq with data augmentation""" 154 n_samples, n_basis = Phi.shape 155 156 # Create augmented data for L2 regularization 157 # This is equivalent to adding sqrt(alpha) * I to the design matrix 158 sqrt_alpha = jnp.sqrt(self.alpha) 159 Phi_aug = jnp.vstack([Phi, sqrt_alpha * jnp.eye(n_basis)]) 160 y_aug = jnp.vstack([y, jnp.zeros((n_basis, y.shape[1]))]) 161 162 # Solve using least squares 163 # Note: jnp.linalg.lstsq is more stable than explicit normal equations 164 weights, residuals, rank, s = jnp.linalg.lstsq( 165 Phi_aug, y_aug, rcond=None 166 ) 167 168 self.w_mean = weights 169 self.weights = weights # For compatibility 170 171 # Estimate noise variance from residuals 172 residuals = y - Phi @ weights 173 self.beta = 1.0 / jnp.maximum(jnp.var(residuals), 1e-8) 174 175 def _fit_bayesian( 176 self, 177 Phi, 178 y, 179 noise_variance=None, 180 ) -> None: 181 """Bayesian ridge regression with evidence approximation""" 182 n_samples, n_basis = Phi.shape 183 184 # Initialize precision parameters 185 if noise_variance is not None: 186 self.beta = 1.0 / noise_variance 187 else: 188 # Initial estimate of beta from data 189 self.beta = 1.0 / jnp.maximum(jnp.var(y), 1e-8) 190 191 # Posterior precision matrix: S_N⁻¹ = alpha * I + beta * ΦᵀΦ 192 I = jnp.eye(n_basis) 193 PhiT_Phi = Phi.T @ Phi 194 195 # Initialize with prior 196 S_N_inv = self.alpha * I 197 198 # Evidence approximation to optimize alpha, beta 199 for _ in range(10): # Iterate to converge on alpha, beta 200 # Update posterior mean and covariance 201 S_N = jnp.linalg.inv(S_N_inv + self.beta * PhiT_Phi) 202 self.w_mean = self.beta * S_N @ Phi.T @ y 203 204 # Update gamma (effective number of parameters) 205 eigenvalues = jnp.linalg.eigvalsh(PhiT_Phi) 206 gamma_val = jnp.sum(eigenvalues / (self.alpha + eigenvalues)) 207 208 # Update alpha and beta (MacKay's fixed point updates) 209 if self.alpha > 0: 210 self.alpha = gamma_val / jnp.sum(self.w_mean**2) 211 212 if noise_variance is None: 213 residuals = y - Phi @ self.w_mean 214 self.beta = (n_samples - gamma_val) / jnp.sum(residuals**2) 215 216 # Update precision matrix 217 S_N_inv = self.alpha * I 218 219 # Store final covariance 220 self.S_N = jnp.linalg.inv(self.alpha * I + self.beta * PhiT_Phi) 221 self.w_cov = self.S_N 222 223 # Also store for compatibility 224 self.weights = self.w_mean 225 226 def transform(self, X): 227 """Transform input data to random Fourier feature space""" 228 if not self.is_fitted: 229 raise ValueError("Model must be fitted before transforming") 230 231 X = jnp.asarray(X) 232 return self._compute_random_features(X, self.W, self.b) 233 234 def predict( 235 self, 236 X, 237 return_std=False, 238 return_cov=False, 239 ): 240 """ 241 Make predictions, optionally with uncertainty quantification. 242 243 Parameters: 244 ----------- 245 X : array-like, shape (n_samples, n_features) 246 Input data 247 return_std : bool 248 If True, return standard deviation of predictive distribution 249 return_cov : bool 250 If True, return full covariance matrix of predictive distribution 251 252 Returns: 253 -------- 254 y_pred : jnp.ndarray 255 Predictive mean 256 y_std or y_cov : jnp.ndarray, optional 257 Predictive standard deviation or covariance 258 """ 259 if not self.is_fitted: 260 raise ValueError("Model must be fitted before prediction") 261 262 X = jnp.asarray(X) 263 Phi = self.transform(X) 264 265 # Predictive mean 266 y_pred = Phi @ self.w_mean 267 268 if not return_std and not return_cov: 269 return y_pred 270 271 if self.method != "bayesian": 272 raise ValueError( 273 "Uncertainty quantification only available for Bayesian method" 274 ) 275 276 # Predictive variance 277 if return_cov: 278 # Full predictive covariance 279 # Σ_pred = (1/β) * I + Φ @ S_N @ Φᵀ 280 pred_cov = (1.0 / self.beta) * jnp.eye( 281 Phi.shape[0] 282 ) + Phi @ self.S_N @ Phi.T 283 return y_pred, pred_cov 284 else: 285 # Diagonal of predictive covariance (standard deviations) 286 # σ²_pred = (1/β) + diag(Φ @ S_N @ Φᵀ) 287 var_diag = (1.0 / self.beta) + jnp.sum( 288 (Phi @ self.S_N) * Phi, axis=1 289 ) 290 y_std = jnp.sqrt(jnp.maximum(var_diag, 0.0)).reshape(-1, 1) 291 return y_pred, y_std 292 293 def sample_posterior( 294 self, 295 X, 296 n_samples=1, 297 key=None, 298 ): 299 """ 300 Sample from the posterior predictive distribution. 301 302 Parameters: 303 ----------- 304 X : array-like 305 Input data 306 n_samples : int 307 Number of samples to draw 308 key : PRNGKey, optional 309 Random key for sampling 310 311 Returns: 312 -------- 313 samples : jnp.ndarray, shape (n_samples, n_test_samples) 314 Samples from posterior predictive distribution 315 """ 316 if self.method != "bayesian": 317 raise ValueError("Sampling only available for Bayesian method") 318 319 if key is None: 320 key = self.key 321 322 X = jnp.asarray(X) 323 Phi = self.transform(X) 324 n_test = Phi.shape[0] 325 326 # Sample weights from posterior 327 key, subkey = random.split(key) 328 w_samples = random.multivariate_normal( 329 subkey, self.w_mean.flatten(), self.S_N, shape=(n_samples,) 330 ) 331 332 # Generate predictions for each weight sample 333 samples = [] 334 for i in range(n_samples): 335 w_sample = w_samples[i].reshape(-1, 1) 336 # Add noise variance 337 key, subkey1, subkey2 = random.split(key, 3) 338 pred_mean = Phi @ w_sample 339 noise = random.normal(subkey2, shape=pred_mean.shape) / jnp.sqrt( 340 self.beta 341 ) 342 samples.append(pred_mean + noise) 343 344 return jnp.stack(samples, axis=0) 345 346 def log_marginal_likelihood(self) -> float: 347 """ 348 Compute log marginal likelihood (evidence) for Bayesian model. 349 350 Returns: 351 -------- 352 log_evidence : float 353 Log marginal likelihood p(y|X,α,β) 354 """ 355 if self.method != "bayesian": 356 raise ValueError( 357 "Log marginal likelihood only available for Bayesian method" 358 ) 359 360 n_samples = self.Phi_train.shape[0] 361 n_basis = self.Phi_train.shape[1] 362 363 # Log determinant term 364 I = jnp.eye(n_basis) 365 A = self.alpha * I + self.beta * self.Phi_train.T @ self.Phi_train 366 sign, logdet_A = jnp.linalg.slogdet(A) 367 logdet_term = 0.5 * (n_basis * jnp.log(self.alpha) - logdet_A) 368 369 # Data fit term 370 residuals = self.y_train - self.Phi_train @ self.w_mean 371 data_fit_term = -0.5 * self.beta * jnp.sum(residuals**2) 372 373 # Constant term 374 const_term = 0.5 * n_samples * jnp.log(self.beta / (2 * jnp.pi)) 375 376 return float(logdet_term + data_fit_term + const_term) 377 378 def get_params(self) -> Dict: 379 """Get model parameters""" 380 return { 381 "n_features": self.n_features, 382 "gamma": self.gamma, 383 "alpha": self.alpha, 384 "beta": self.beta if self.beta is not None else None, 385 "method": self.method if hasattr(self, "method") else None, 386 "input_dim": self.input_dim if hasattr(self, "input_dim") else None, 387 } 388 389 def set_params(self, **params) -> "RandomFourierFeaturesRidge": 390 """Set model parameters""" 391 for key, value in params.items(): 392 if hasattr(self, key): 393 setattr(self, key, value) 394 return self
Random Fourier Features with Bayesian Ridge Regression.
Implements both standard (MLE) and Bayesian versions with uncertainty quantification. Uses data augmentation for L2 regularization via jnp.lstsq.
95 def fit( 96 self, 97 X, 98 y, 99 method="bayesian", 100 noise_variance=None, 101 ): 102 """ 103 Fit the model using either standard or Bayesian ridge regression. 104 105 Parameters: 106 ----------- 107 X : array-like, shape (n_samples, n_features) 108 Training data 109 y : array-like, shape (n_samples,) or (n_samples, n_targets) 110 Target values 111 method : str, either "standard" or "bayesian" 112 "standard": Maximum likelihood estimation with L2 regularization 113 "bayesian": Full Bayesian inference with uncertainty quantification 114 noise_variance : float, optional 115 If provided, fixes the noise variance instead of estimating it 116 """ 117 # Convert to JAX arrays if needed 118 X = jnp.asarray(X) 119 y = jnp.asarray(y) 120 121 if len(y.shape) == 1: 122 y = y.reshape(-1, 1) 123 124 n_samples, input_dim = X.shape 125 126 # Initialize random Fourier weights 127 self.W, self.b = self._init_random_weights(input_dim) 128 129 # Compute random Fourier features 130 Phi = self._compute_random_features(X, self.W, self.b) 131 n_basis = Phi.shape[1] # D + 1 if bias included 132 133 # Store feature matrix and target values for Bayesian updates/likelihood computation 134 self.Phi_train = Phi 135 self.y_train = y # Store y_train 136 137 if method == "standard": 138 # Standard ridge regression using data augmentation for regularization 139 self._fit_standard(Phi, y) 140 elif method == "bayesian": 141 # Bayesian ridge regression 142 self._fit_bayesian(Phi, y, noise_variance) 143 else: 144 raise ValueError("method must be 'standard' or 'bayesian'") 145 146 self.is_fitted = True 147 self.method = method 148 self.input_dim = input_dim 149 150 return self
Fit the model using either standard or Bayesian ridge regression.
Parameters:
X : array-like, shape (n_samples, n_features) Training data y : array-like, shape (n_samples,) or (n_samples, n_targets) Target values method : str, either "standard" or "bayesian" "standard": Maximum likelihood estimation with L2 regularization "bayesian": Full Bayesian inference with uncertainty quantification noise_variance : float, optional If provided, fixes the noise variance instead of estimating it
234 def predict( 235 self, 236 X, 237 return_std=False, 238 return_cov=False, 239 ): 240 """ 241 Make predictions, optionally with uncertainty quantification. 242 243 Parameters: 244 ----------- 245 X : array-like, shape (n_samples, n_features) 246 Input data 247 return_std : bool 248 If True, return standard deviation of predictive distribution 249 return_cov : bool 250 If True, return full covariance matrix of predictive distribution 251 252 Returns: 253 -------- 254 y_pred : jnp.ndarray 255 Predictive mean 256 y_std or y_cov : jnp.ndarray, optional 257 Predictive standard deviation or covariance 258 """ 259 if not self.is_fitted: 260 raise ValueError("Model must be fitted before prediction") 261 262 X = jnp.asarray(X) 263 Phi = self.transform(X) 264 265 # Predictive mean 266 y_pred = Phi @ self.w_mean 267 268 if not return_std and not return_cov: 269 return y_pred 270 271 if self.method != "bayesian": 272 raise ValueError( 273 "Uncertainty quantification only available for Bayesian method" 274 ) 275 276 # Predictive variance 277 if return_cov: 278 # Full predictive covariance 279 # Σ_pred = (1/β) * I + Φ @ S_N @ Φᵀ 280 pred_cov = (1.0 / self.beta) * jnp.eye( 281 Phi.shape[0] 282 ) + Phi @ self.S_N @ Phi.T 283 return y_pred, pred_cov 284 else: 285 # Diagonal of predictive covariance (standard deviations) 286 # σ²_pred = (1/β) + diag(Φ @ S_N @ Φᵀ) 287 var_diag = (1.0 / self.beta) + jnp.sum( 288 (Phi @ self.S_N) * Phi, axis=1 289 ) 290 y_std = jnp.sqrt(jnp.maximum(var_diag, 0.0)).reshape(-1, 1) 291 return y_pred, y_std
Make predictions, optionally with uncertainty quantification.
Parameters:
X : array-like, shape (n_samples, n_features) Input data return_std : bool If True, return standard deviation of predictive distribution return_cov : bool If True, return full covariance matrix of predictive distribution
Returns:
y_pred : jnp.ndarray Predictive mean y_std or y_cov : jnp.ndarray, optional Predictive standard deviation or covariance
397class RandomFourierFeaturesRidgeGCV(RandomFourierFeaturesRidge): 398 """ 399 Extends RandomFourierFeaturesRidge with GCV for automatic 400 regularization parameter selection. 401 """ 402 403 def __init__( 404 self, 405 n_features: int = 100, 406 gamma: float = 1.0, 407 alpha: Optional[float] = None, 408 include_bias: bool = True, 409 random_seed: int = 42, 410 ): 411 super().__init__(n_features, gamma, alpha, include_bias, random_seed) 412 self.alpha_opt = None # Stores the GCV-optimized alpha 413 self.gcv_score = None # Stores the optimal GCV score 414 415 def _compute_gcv( 416 self, 417 alpha, 418 s_sq, 419 U, 420 y, 421 n_samples, 422 ): 423 """ 424 Compute GCV score for a given alpha. 425 426 Parameters: 427 ----------- 428 alpha : float 429 Regularization parameter 430 s_sq : jnp.ndarray 431 Squared singular values of design matrix Φ 432 U : jnp.ndarray 433 Left singular vectors of Φ 434 y : jnp.ndarray 435 Target values 436 n_samples : int 437 Number of data points 438 439 Returns: 440 -------- 441 gcv : float 442 GCV score for this alpha 443 """ 444 # Degrees of freedom: df(α) = Σ(σ_j²/(σ_j² + α)) 445 df = jnp.sum(s_sq / (s_sq + alpha)) 446 447 # Compute residual sum of squares efficiently using SVD 448 # y_pred = U @ (S²/(S² + α)) @ (U.T @ y) 449 Uty = U.T @ y 450 shrinkage = s_sq / (s_sq + alpha) 451 y_pred = U @ (shrinkage * Uty) 452 residuals = y - y_pred 453 rss = jnp.sum(residuals**2) 454 455 # GCV formula 456 denom = (1.0 - df / n_samples) ** 2 457 gcv = (rss / n_samples) / denom 458 459 return float(gcv) 460 461 def fit_gcv( 462 self, 463 X, 464 y, 465 alpha_range: Tuple[float, float] = (1e-8, 1e4), 466 n_alphas: int = 50, 467 method: str = "standard", 468 optimize: bool = True, 469 ) -> "RandomFourierFeaturesRidgeGCV": 470 """ 471 Fit model with GCV-optimized regularization parameter. 472 473 Parameters: 474 ----------- 475 X : array-like 476 Training data 477 y : array-like 478 Target values 479 alpha_range : tuple 480 (min_alpha, max_alpha) range to search 481 n_alphas : int 482 Number of alpha values to try in initial grid search 483 method : str 484 "standard" or "bayesian" 485 optimize : bool 486 If True, perform fine optimization after grid search 487 488 Returns: 489 -------- 490 self : fitted model 491 """ 492 # Convert to JAX arrays 493 X = jnp.asarray(X) 494 y = jnp.asarray(y) 495 496 if len(y.shape) == 1: 497 y = y.reshape(-1, 1) 498 499 n_samples, input_dim = X.shape 500 501 # Initialize random Fourier weights 502 self.W, self.b = self._init_random_weights(input_dim) 503 504 # Compute random Fourier features 505 Phi = self._compute_random_features(X, self.W, self.b) 506 507 # Compute SVD of design matrix for efficient GCV computation 508 # Φ = U @ diag(S) @ V.T 509 U, S, Vt = jnp.linalg.svd(Phi, full_matrices=False) 510 s_sq = S**2 # Squared singular values 511 512 # Grid search on log scale 513 alphas_grid = jnp.logspace( 514 jnp.log10(alpha_range[0]), jnp.log10(alpha_range[1]), n_alphas 515 ) 516 517 gcv_scores = [] 518 for alpha in alphas_grid: 519 score = self._compute_gcv(float(alpha), s_sq, U, y, n_samples) 520 gcv_scores.append(score) 521 522 # Find best alpha from grid 523 best_idx = jnp.argmin(jnp.array(gcv_scores)) 524 alpha_grid_opt = float(alphas_grid[best_idx]) 525 526 # Fine optimization using Brent's method 527 if optimize: 528 # Define objective for scipy optimizer 529 def gcv_objective(log_alpha): 530 alpha = 10**log_alpha 531 return self._compute_gcv(alpha, s_sq, U, y, n_samples) 532 533 # Optimize in log space 534 result = minimize_scalar( 535 gcv_objective, 536 bounds=(jnp.log10(alpha_range[0]), jnp.log10(alpha_range[1])), 537 method="bounded", 538 options={"xatol": 0.1}, # Tolerance in log10 space 539 ) 540 541 if result.success: 542 alpha_opt = 10**result.x 543 gcv_opt = result.fun 544 else: 545 alpha_opt = alpha_grid_opt 546 gcv_opt = gcv_scores[best_idx] 547 else: 548 alpha_opt = alpha_grid_opt 549 gcv_opt = gcv_scores[best_idx] 550 551 # Store optimized parameters 552 self.alpha_opt = alpha_opt 553 self.gcv_score = gcv_opt 554 self.alpha = alpha_opt # Set as the model's alpha 555 556 # Fit final model with optimized alpha 557 if method == "standard": 558 self._fit_standard(Phi, y) 559 elif method == "bayesian": 560 # For Bayesian version, we can use alpha as prior precision 561 # Optionally optimize beta too 562 self._fit_bayesian(Phi, y) 563 else: 564 raise ValueError("method must be 'standard' or 'bayesian'") 565 566 self.is_fitted = True 567 self.method = method 568 self.input_dim = input_dim 569 570 return self 571 572 def fit_gcv_with_path( 573 self, 574 X, 575 y, 576 alpha_range: Tuple[float, float] = (1e-8, 1e4), 577 n_alphas: int = 100, 578 method: str = "standard", 579 ) -> dict: 580 """ 581 Fit with GCV and return full regularization path. 582 583 Returns: 584 -------- 585 path_info : dict 586 Dictionary with alpha values, GCV scores, and metrics 587 """ 588 X = jnp.asarray(X) 589 y = jnp.asarray(y) 590 591 if len(y.shape) == 1: 592 y = y.reshape(-1, 1) 593 594 n_samples, input_dim = X.shape 595 596 # Initialize random features 597 self.W, self.b = self._init_random_weights(input_dim) 598 Phi = self._compute_random_features(X, self.W, self.b) 599 600 # Compute SVD 601 U, S, Vt = jnp.linalg.svd(Phi, full_matrices=False) 602 s_sq = S**2 603 604 # Compute GCV path 605 alphas = jnp.logspace( 606 jnp.log10(alpha_range[0]), jnp.log10(alpha_range[1]), n_alphas 607 ) 608 609 gcv_scores = [] 610 train_errors = [] 611 effective_dof = [] 612 613 for alpha in alphas: 614 alpha_val = float(alpha) 615 616 # GCV score 617 gcv = self._compute_gcv(alpha_val, s_sq, U, y, n_samples) 618 gcv_scores.append(gcv) 619 620 # Effective degrees of freedom 621 df = float(jnp.sum(s_sq / (s_sq + alpha_val))) 622 effective_dof.append(df) 623 624 # Training error for this alpha 625 # Compute weights: w = V @ (S/(S² + α)) @ (U.T @ y) 626 Uty = U.T @ y 627 shrinkage = S / (s_sq + alpha_val) 628 w_alpha = Vt.T @ (shrinkage.reshape(-1, 1) * Uty) 629 y_pred = Phi @ w_alpha 630 train_err = float(jnp.mean((y - y_pred) ** 2)) 631 train_errors.append(train_err) 632 633 # Find optimal alpha 634 best_idx = jnp.argmin(jnp.array(gcv_scores)) 635 alpha_opt = float(alphas[best_idx]) 636 637 # Fit final model with optimal alpha 638 self.alpha = alpha_opt 639 if method == "standard": 640 self._fit_standard(Phi, y) 641 elif method == "bayesian": 642 self._fit_bayesian(Phi, y) 643 644 self.is_fitted = True 645 self.method = method 646 self.input_dim = input_dim 647 self.alpha_opt = alpha_opt 648 self.gcv_score = gcv_scores[best_idx] 649 650 # Return full path information 651 path_info = { 652 "alphas": np.array(alphas), 653 "gcv_scores": np.array(gcv_scores), 654 "train_errors": np.array(train_errors), 655 "effective_dof": np.array(effective_dof), 656 "alpha_opt": alpha_opt, 657 "gcv_opt": gcv_scores[best_idx], 658 "dof_opt": effective_dof[best_idx], 659 } 660 661 return path_info 662 663 def plot_gcv_path(self, path_info: dict, save_path: str = None): 664 """ 665 Plot GCV regularization path. 666 """ 667 import matplotlib.pyplot as plt 668 669 fig, axes = plt.subplots(2, 2, figsize=(12, 10)) 670 671 # Plot 1: GCV score vs alpha 672 ax = axes[0, 0] 673 ax.semilogx( 674 path_info["alphas"], path_info["gcv_scores"], "b-", linewidth=2 675 ) 676 ax.axvline( 677 path_info["alpha_opt"], 678 color="r", 679 linestyle="--", 680 label=f'Optimal α = {path_info["alpha_opt"]:.2e}', 681 ) 682 ax.set_xlabel("Regularization α") 683 ax.set_ylabel("GCV Score") 684 ax.set_title("GCV Score vs Regularization") 685 ax.legend() 686 ax.grid(True, alpha=0.3) 687 688 # Plot 2: Training error vs alpha 689 ax = axes[0, 1] 690 ax.loglog( 691 path_info["alphas"], path_info["train_errors"], "g-", linewidth=2 692 ) 693 ax.axvline(path_info["alpha_opt"], color="r", linestyle="--") 694 ax.set_xlabel("Regularization α") 695 ax.set_ylabel("Training MSE") 696 ax.set_title("Training Error vs Regularization") 697 ax.grid(True, alpha=0.3) 698 699 # Plot 3: Effective DOF vs alpha 700 ax = axes[1, 0] 701 ax.semilogx( 702 path_info["alphas"], path_info["effective_dof"], "m-", linewidth=2 703 ) 704 ax.axvline(path_info["alpha_opt"], color="r", linestyle="--") 705 ax.axhline( 706 path_info["dof_opt"], 707 color="r", 708 linestyle=":", 709 label=f'DOF at optimum = {path_info["dof_opt"]:.1f}', 710 ) 711 ax.set_xlabel("Regularization α") 712 ax.set_ylabel("Effective Degrees of Freedom") 713 ax.set_title("Model Complexity vs Regularization") 714 ax.legend() 715 ax.grid(True, alpha=0.3) 716 717 # Plot 4: GCV vs DOF 718 ax = axes[1, 1] 719 ax.plot( 720 path_info["effective_dof"], 721 path_info["gcv_scores"], 722 "k-", 723 linewidth=2, 724 ) 725 ax.axvline(path_info["dof_opt"], color="r", linestyle="--") 726 ax.set_xlabel("Effective Degrees of Freedom") 727 ax.set_ylabel("GCV Score") 728 ax.set_title("GCV vs Model Complexity") 729 ax.grid(True, alpha=0.3) 730 731 plt.suptitle( 732 "GCV Regularization Path Analysis", fontsize=14, fontweight="bold" 733 ) 734 plt.tight_layout() 735 736 if save_path: 737 plt.savefig(save_path, dpi=150, bbox_inches="tight") 738 739 plt.show()
Extends RandomFourierFeaturesRidge with GCV for automatic regularization parameter selection.
16class RegressorUpdater(BaseEstimator, RegressorMixin): 17 """ 18 Update a regression model with new observations 19 20 Parameters 21 ---------- 22 regr: object 23 A regression model with a coef_ attribute 24 alpha: float 25 Updating factor's exponent 26 27 Attributes 28 ---------- 29 n_obs_: int 30 Number of observations 31 coef_: np.ndarray 32 Coefficients of the model 33 updating_factor_: float 34 Updating factor 35 36 """ 37 38 def __init__(self, regr, alpha=0.5): 39 self.regr = regr 40 self.alpha = alpha 41 self.n_obs_ = None 42 self.coef_ = None 43 self.updating_factor_ = None 44 try: 45 self.coef_ = self.regr.coef_ 46 if isinstance(self.regr, Base): 47 self.n_obs_ = self.regr.scaler_.n_samples_seen_ 48 except AttributeError: 49 pass 50 51 def fit(self, X, y, **kwargs): 52 if isinstance( 53 self.regr, CustomRegressor 54 ): # nnetsauce model not deep --- 55 if check_is_fitted(self.regr) == False: 56 self.regr.fit(X, y, **kwargs) 57 self.n_obs_ = X.shape[0] 58 if hasattr(self.regr, "coef_"): 59 self.coef_ = self.regr.coef_ 60 return self 61 self.n_obs_ = self.regr.scaler_.n_samples_seen_ 62 if hasattr(self.regr, "coef_"): 63 self.coef_ = self.regr.coef_ 64 return self 65 66 if ( 67 hasattr(self.regr, "coef_") == False 68 ): # sklearn model or CustomRegressor model --- 69 self.regr.fit(X, y) 70 self.n_obs_ = X.shape[0] 71 self.regr.fit(X, y) 72 if hasattr(self.regr, "stacked_obj"): 73 self.coef_ = self.regr.stacked_obj.coef_ 74 else: 75 self.coef_ = self.regr.coef_ 76 return self 77 self.n_obs_ = X.shape[0] 78 if hasattr(self.regr, "coef_"): 79 self.coef_ = self.regr.coef_ 80 return self 81 82 def predict(self, X): 83 # assert hasattr(self.regr, "coef_"), "model must have coef_ attribute" 84 return self.regr.predict(X) 85 86 def partial_fit(self, X, y): 87 assert hasattr( 88 self.regr, "coef_" 89 ), "model must be fitted first (i.e have 'coef_' attribute)" 90 assert ( 91 self.n_obs_ is not None 92 ), "model must be fitted first (i.e have 'n_obs_' attribute)" 93 94 if len(X.shape) == 1: 95 X = X.reshape(1, -1) 96 97 assert X.shape[0] == 1, "X must have one row" 98 99 self.updating_factor_ = self.n_obs_ ** (-self.alpha) 100 101 if isinstance(self.regr, Base): # nnetsauce model --- 102 newX = deepcopy(X) 103 104 if isinstance( 105 self.regr, CustomRegressor 106 ): # other nnetsauce model (CustomRegressor) --- 107 newX = self.regr.cook_test_set(X=X) 108 if isinstance(X, pd.DataFrame): 109 newx = newX.values.ravel() 110 else: 111 newx = newX.ravel() 112 113 else: # an sklearn model --- 114 if isinstance(X, pd.DataFrame): 115 newx = X.values.ravel() 116 else: 117 newx = X.ravel() 118 119 new_coef = self.regr.coef_ + self.updating_factor_ * np.dot( 120 newx, y - np.dot(newx, self.regr.coef_) 121 ) 122 self.regr.coef_ = _update_mean(self.regr.coef_, self.n_obs_, new_coef) 123 self.coef_ = deepcopy(self.regr.coef_) 124 self.n_obs_ += 1 125 return self
Update a regression model with new observations
Parameters
regr: object A regression model with a coef_ attribute alpha: float Updating factor's exponent
Attributes
n_obs_: int Number of observations coef_: np.ndarray Coefficients of the model updating_factor_: float Updating factor
51 def fit(self, X, y, **kwargs): 52 if isinstance( 53 self.regr, CustomRegressor 54 ): # nnetsauce model not deep --- 55 if check_is_fitted(self.regr) == False: 56 self.regr.fit(X, y, **kwargs) 57 self.n_obs_ = X.shape[0] 58 if hasattr(self.regr, "coef_"): 59 self.coef_ = self.regr.coef_ 60 return self 61 self.n_obs_ = self.regr.scaler_.n_samples_seen_ 62 if hasattr(self.regr, "coef_"): 63 self.coef_ = self.regr.coef_ 64 return self 65 66 if ( 67 hasattr(self.regr, "coef_") == False 68 ): # sklearn model or CustomRegressor model --- 69 self.regr.fit(X, y) 70 self.n_obs_ = X.shape[0] 71 self.regr.fit(X, y) 72 if hasattr(self.regr, "stacked_obj"): 73 self.coef_ = self.regr.stacked_obj.coef_ 74 else: 75 self.coef_ = self.regr.coef_ 76 return self 77 self.n_obs_ = X.shape[0] 78 if hasattr(self.regr, "coef_"): 79 self.coef_ = self.regr.coef_ 80 return self
16class ClassifierUpdater(BaseEstimator, ClassifierMixin): 17 """ 18 Update a regression model with new observations 19 20 Parameters 21 ---------- 22 clf: object 23 A regression model with a coef_ attribute 24 alpha: float 25 Updating factor's exponent 26 27 Attributes 28 ---------- 29 n_obs_: int 30 Number of observations 31 coef_: np.ndarray 32 Coefficients of the model 33 updating_factor_: float 34 Updating factor 35 36 """ 37 38 _estimator_type = "classifier" 39 40 def __init__(self, clf, alpha=0.5): 41 self.clf = clf 42 self.alpha = alpha 43 self.n_obs_ = None 44 self.coef_ = None 45 self.updating_factor_ = None 46 try: 47 self.coef_ = self.clf.coef_ 48 if isinstance(self.clf, Base): 49 self.n_obs_ = self.clf.scaler_.n_samples_seen_ 50 except AttributeError: 51 pass 52 53 def fit(self, X, y, **kwargs): 54 raise NotImplementedError( 55 "fit method is not implemented for ClassifierUpdater" 56 ) 57 58 if isinstance( 59 self.clf, CustomClassifier 60 ): # nnetsauce model not deep --- 61 if check_is_fitted(self.clf) == False: 62 self.clf.fit(X, y, **kwargs) 63 self.n_obs_ = X.shape[0] 64 if hasattr(self.clf, "coef_"): 65 self.coef_ = self.clf.coef_ 66 return self 67 self.n_obs_ = self.clf.scaler_.n_samples_seen_ 68 if hasattr(self.clf, "coef_"): 69 self.coef_ = self.clf.coef_ 70 return self 71 72 if ( 73 hasattr(self.clf, "coef_") == False 74 ): # sklearn model or CustomClassifier model --- 75 self.clf.fit(X, y) 76 self.n_obs_ = X.shape[0] 77 self.clf.fit(X, y) 78 if hasattr(self.clf, "stacked_obj"): 79 self.coef_ = self.clf.stacked_obj.coef_ 80 else: 81 self.coef_ = self.clf.coef_ 82 return self 83 self.n_obs_ = X.shape[0] 84 if hasattr(self.clf, "coef_"): 85 self.coef_ = self.clf.coef_ 86 return self 87 88 def predict(self, X): 89 raise NotImplementedError( 90 "predict method is not implemented for ClassifierUpdater" 91 ) 92 # assert hasattr(self.clf, "coef_"), "model must have coef_ attribute" 93 return self.clf.predict(X) 94 95 def partial_fit(self, X, y): 96 raise NotImplementedError( 97 "partial_fit method is not implemented for ClassifierUpdater" 98 ) 99 100 assert hasattr( 101 self.clf, "coef_" 102 ), "model must be fitted first (i.e have 'coef_' attribute)" 103 assert ( 104 self.n_obs_ is not None 105 ), "model must be fitted first (i.e have 'n_obs_' attribute)" 106 107 if len(X.shape) == 1: 108 X = X.reshape(1, -1) 109 110 assert X.shape[0] == 1, "X must have one row" 111 112 self.updating_factor_ = self.n_obs_ ** (-self.alpha) 113 114 if isinstance(self.clf, Base): # nnetsauce model --- 115 newX = deepcopy(X) 116 117 if isinstance( 118 self.clf, CustomClassifier 119 ): # other nnetsauce model (CustomClassifier) --- 120 newX = self.clf.cook_test_set(X=X) 121 if isinstance(X, pd.DataFrame): 122 newx = newX.values.ravel() 123 else: 124 newx = newX.ravel() 125 126 else: # an sklearn model --- 127 if isinstance(X, pd.DataFrame): 128 newx = X.values.ravel() 129 else: 130 newx = X.ravel() 131 132 new_coef = self.clf.coef_ + self.updating_factor_ * np.dot( 133 newx, y - np.dot(newx, self.clf.coef_) 134 ) 135 self.clf.coef_ = _update_mean(self.clf.coef_, self.n_obs_, new_coef) 136 self.coef_ = deepcopy(self.clf.coef_) 137 self.n_obs_ += 1 138 return self
Update a regression model with new observations
Parameters
clf: object A regression model with a coef_ attribute alpha: float Updating factor's exponent
Attributes
n_obs_: int Number of observations coef_: np.ndarray Coefficients of the model updating_factor_: float Updating factor
53 def fit(self, X, y, **kwargs): 54 raise NotImplementedError( 55 "fit method is not implemented for ClassifierUpdater" 56 ) 57 58 if isinstance( 59 self.clf, CustomClassifier 60 ): # nnetsauce model not deep --- 61 if check_is_fitted(self.clf) == False: 62 self.clf.fit(X, y, **kwargs) 63 self.n_obs_ = X.shape[0] 64 if hasattr(self.clf, "coef_"): 65 self.coef_ = self.clf.coef_ 66 return self 67 self.n_obs_ = self.clf.scaler_.n_samples_seen_ 68 if hasattr(self.clf, "coef_"): 69 self.coef_ = self.clf.coef_ 70 return self 71 72 if ( 73 hasattr(self.clf, "coef_") == False 74 ): # sklearn model or CustomClassifier model --- 75 self.clf.fit(X, y) 76 self.n_obs_ = X.shape[0] 77 self.clf.fit(X, y) 78 if hasattr(self.clf, "stacked_obj"): 79 self.coef_ = self.clf.stacked_obj.coef_ 80 else: 81 self.coef_ = self.clf.coef_ 82 return self 83 self.n_obs_ = X.shape[0] 84 if hasattr(self.clf, "coef_"): 85 self.coef_ = self.clf.coef_ 86 return self
26class RidgeRegressor(BaseEstimator, RegressorMixin): 27 """Ridge. 28 29 Attributes: 30 31 reg_lambda: float 32 regularization parameter. 33 34 backend: str 35 type of backend; must be in ('cpu', 'gpu', 'tpu') 36 37 """ 38 39 def __init__(self, reg_lambda=0.1, backend="cpu"): 40 assert backend in ( 41 "cpu", 42 "gpu", 43 "tpu", 44 ), "`backend` must be in ('cpu', 'gpu', 'tpu')" 45 46 if not JAX_AVAILABLE and backend != "cpu": 47 raise RuntimeError( 48 "JAX is required for this feature. Install with: pip install yourpackage[jax]" 49 ) 50 51 sys_platform = platform.system() 52 53 if (sys_platform == "Windows") and (backend in ("gpu", "tpu")): 54 warnings.warn( 55 "No GPU/TPU computing on Windows yet, backend set to 'cpu'" 56 ) 57 backend = "cpu" 58 59 self.reg_lambda = reg_lambda 60 self.backend = backend 61 self.coef_ = None 62 63 def fit(self, X, y, **kwargs): 64 """Fit matrixops (classifier) to training data (X, y) 65 66 Args: 67 68 X: {array-like}, shape = [n_samples, n_features] 69 Training vectors, where n_samples is the number 70 of samples and n_features is the number of features. 71 72 y: array-like, shape = [n_samples] 73 Target values. 74 75 **kwargs: additional parameters to be passed to self.cook_training_set. 76 77 Returns: 78 79 self: object. 80 81 """ 82 self.ym, centered_y = mo.center_response(y) 83 self.xm = X.mean(axis=0) 84 self.xsd = X.std(axis=0) 85 self.xsd[self.xsd == 0] = 1 # avoid division by zero 86 X_ = (X - self.xm[None, :]) / self.xsd[None, :] 87 88 if self.backend == "cpu": 89 if len(centered_y.shape) <= 1: 90 eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1]) 91 X_ = np.row_stack((X_, eye_term)) 92 y_ = np.concatenate((centered_y, np.zeros(X.shape[1]))) 93 beta_info = get_beta(X_, y_) 94 self.coef_ = beta_info[0] 95 else: 96 try: 97 eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1]) 98 X_ = np.row_stack((X_, eye_term)) 99 y_ = np.row_stack( 100 ( 101 centered_y, 102 np.zeros((eye_term.shape[0], centered_y.shape[1])), 103 ) 104 ) 105 beta_info = get_beta(X_, y_) 106 self.coef_ = beta_info[0] 107 except Exception: 108 x = inv( 109 mo.crossprod(X_) + self.reg_lambda * np.eye(X_.shape[1]) 110 ) 111 hat_matrix = mo.tcrossprod(x, X_) 112 self.coef_ = mo.safe_sparse_dot(hat_matrix, centered_y) 113 return self 114 115 x = jinv( 116 mo.crossprod(X_, backend=self.backend) 117 + self.reg_lambda * jnp.eye(X_.shape[1]) 118 ) 119 120 hat_matrix = mo.tcrossprod(x, X_, backend=self.backend) 121 self.coef_ = mo.safe_sparse_dot( 122 hat_matrix, centered_y, backend=self.backend 123 ) 124 return self 125 126 def predict(self, X, **kwargs): 127 """Predict test data X. 128 129 Args: 130 131 X: {array-like}, shape = [n_samples, n_features] 132 Training vectors, where n_samples is the number 133 of samples and n_features is the number of features. 134 135 **kwargs: additional parameters to be passed to `predict_proba` 136 137 Returns: 138 139 model predictions: {array-like} 140 141 """ 142 X_ = (X - self.xm[None, :]) / self.xsd[None, :] 143 144 if self.backend == "cpu": 145 if isinstance(self.ym, float): 146 return self.ym + mo.safe_sparse_dot(X_, self.coef_) 147 return self.ym[None, :] + mo.safe_sparse_dot(X_, self.coef_) 148 149 # if self.backend in ("gpu", "tpu"): 150 if isinstance(self.ym, float): 151 return self.ym + mo.safe_sparse_dot( 152 X_, self.coef_, backend=self.backend 153 ) 154 return self.ym[None, :] + mo.safe_sparse_dot( 155 X_, self.coef_, backend=self.backend 156 )
Ridge.
Attributes:
reg_lambda: float
regularization parameter.
backend: str
type of backend; must be in ('cpu', 'gpu', 'tpu')
63 def fit(self, X, y, **kwargs): 64 """Fit matrixops (classifier) to training data (X, y) 65 66 Args: 67 68 X: {array-like}, shape = [n_samples, n_features] 69 Training vectors, where n_samples is the number 70 of samples and n_features is the number of features. 71 72 y: array-like, shape = [n_samples] 73 Target values. 74 75 **kwargs: additional parameters to be passed to self.cook_training_set. 76 77 Returns: 78 79 self: object. 80 81 """ 82 self.ym, centered_y = mo.center_response(y) 83 self.xm = X.mean(axis=0) 84 self.xsd = X.std(axis=0) 85 self.xsd[self.xsd == 0] = 1 # avoid division by zero 86 X_ = (X - self.xm[None, :]) / self.xsd[None, :] 87 88 if self.backend == "cpu": 89 if len(centered_y.shape) <= 1: 90 eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1]) 91 X_ = np.row_stack((X_, eye_term)) 92 y_ = np.concatenate((centered_y, np.zeros(X.shape[1]))) 93 beta_info = get_beta(X_, y_) 94 self.coef_ = beta_info[0] 95 else: 96 try: 97 eye_term = np.sqrt(self.reg_lambda) * np.eye(X.shape[1]) 98 X_ = np.row_stack((X_, eye_term)) 99 y_ = np.row_stack( 100 ( 101 centered_y, 102 np.zeros((eye_term.shape[0], centered_y.shape[1])), 103 ) 104 ) 105 beta_info = get_beta(X_, y_) 106 self.coef_ = beta_info[0] 107 except Exception: 108 x = inv( 109 mo.crossprod(X_) + self.reg_lambda * np.eye(X_.shape[1]) 110 ) 111 hat_matrix = mo.tcrossprod(x, X_) 112 self.coef_ = mo.safe_sparse_dot(hat_matrix, centered_y) 113 return self 114 115 x = jinv( 116 mo.crossprod(X_, backend=self.backend) 117 + self.reg_lambda * jnp.eye(X_.shape[1]) 118 ) 119 120 hat_matrix = mo.tcrossprod(x, X_, backend=self.backend) 121 self.coef_ = mo.safe_sparse_dot( 122 hat_matrix, centered_y, backend=self.backend 123 ) 124 return self
Fit matrixops (classifier) to training data (X, y)
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to self.cook_training_set.
Returns:
self: object.
126 def predict(self, X, **kwargs): 127 """Predict test data X. 128 129 Args: 130 131 X: {array-like}, shape = [n_samples, n_features] 132 Training vectors, where n_samples is the number 133 of samples and n_features is the number of features. 134 135 **kwargs: additional parameters to be passed to `predict_proba` 136 137 Returns: 138 139 model predictions: {array-like} 140 141 """ 142 X_ = (X - self.xm[None, :]) / self.xsd[None, :] 143 144 if self.backend == "cpu": 145 if isinstance(self.ym, float): 146 return self.ym + mo.safe_sparse_dot(X_, self.coef_) 147 return self.ym[None, :] + mo.safe_sparse_dot(X_, self.coef_) 148 149 # if self.backend in ("gpu", "tpu"): 150 if isinstance(self.ym, float): 151 return self.ym + mo.safe_sparse_dot( 152 X_, self.coef_, backend=self.backend 153 ) 154 return self.ym[None, :] + mo.safe_sparse_dot( 155 X_, self.coef_, backend=self.backend 156 )
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to `predict_proba`
Returns:
model predictions: {array-like}
23class Ridge2Regressor(Ridge2, RegressorMixin): 24 """Ridge regression with 2 regularization parameters derived from class Ridge 25 26 Parameters: 27 28 n_hidden_features: int 29 number of nodes in the hidden layer 30 31 activation_name: str 32 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 33 34 a: float 35 hyperparameter for 'prelu' or 'elu' activation function 36 37 nodes_sim: str 38 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 39 'uniform' 40 41 bias: boolean 42 indicates if the hidden layer contains a bias term (True) or not 43 (False) 44 45 dropout: float 46 regularization parameter; (random) percentage of nodes dropped out 47 of the training 48 49 n_clusters: int 50 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 51 no clustering) 52 53 cluster_encode: bool 54 defines how the variable containing clusters is treated (default is one-hot) 55 if `False`, then labels are used, without one-hot encoding 56 57 type_clust: str 58 type of clustering method: currently k-means ('kmeans') or Gaussian 59 Mixture Model ('gmm') 60 61 type_scaling: a tuple of 3 strings 62 scaling methods for inputs, hidden layer, and clustering respectively 63 (and when relevant). 64 Currently available: standardization ('std') or MinMax scaling ('minmax') 65 66 lambda1: float 67 regularization parameter on direct link 68 69 lambda2: float 70 regularization parameter on hidden layer 71 72 seed: int 73 reproducibility seed for nodes_sim=='uniform' 74 75 backend: str 76 'cpu' or 'gpu' or 'tpu' 77 78 Attributes: 79 80 beta_: {array-like} 81 regression coefficients 82 83 coef_: {array-like} 84 alias for `beta_`, regression coefficients 85 86 y_mean_: float 87 average response 88 89 """ 90 91 # construct the object ----- 92 93 def __init__( 94 self, 95 n_hidden_features=5, 96 activation_name="relu", 97 a=0.01, 98 nodes_sim="sobol", 99 bias=True, 100 dropout=0, 101 n_clusters=2, 102 cluster_encode=True, 103 type_clust="kmeans", 104 type_scaling=("std", "std", "std"), 105 lambda1=0.1, 106 lambda2=0.1, 107 seed=123, 108 backend="cpu", 109 ): 110 super().__init__( 111 n_hidden_features=n_hidden_features, 112 activation_name=activation_name, 113 a=a, 114 nodes_sim=nodes_sim, 115 bias=bias, 116 dropout=dropout, 117 n_clusters=n_clusters, 118 cluster_encode=cluster_encode, 119 type_clust=type_clust, 120 type_scaling=type_scaling, 121 lambda1=lambda1, 122 lambda2=lambda2, 123 seed=seed, 124 backend=backend, 125 ) 126 127 self.type_fit = "regression" 128 self.coef_ = None 129 130 def fit(self, X, y, **kwargs): 131 """Fit Ridge model to training data (X, y). 132 133 Args: 134 135 X: {array-like}, shape = [n_samples, n_features] 136 Training vectors, where n_samples is the number 137 of samples and n_features is the number of features. 138 139 y: array-like, shape = [n_samples] 140 Target values. 141 142 **kwargs: additional parameters to be passed to 143 self.cook_training_set or self.obj.fit 144 145 Returns: 146 147 self: object 148 149 """ 150 151 sys_platform = platform.system() 152 153 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 154 155 n_X, p_X = X.shape 156 n_Z, p_Z = scaled_Z.shape 157 158 if self.n_clusters > 0: 159 if self.encode_clusters == True: 160 n_features = p_X + self.n_clusters 161 else: 162 n_features = p_X + 1 163 else: 164 n_features = p_X 165 166 X_ = scaled_Z[:, 0:n_features] 167 Phi_X_ = scaled_Z[:, n_features:p_Z] 168 169 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 170 np.repeat(1, n_features) 171 ) 172 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 173 D = mo.crossprod( 174 x=Phi_X_, backend=self.backend 175 ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1])) 176 177 if sys_platform in ("Linux", "Darwin"): 178 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 179 else: 180 B_inv = pinv(B) 181 182 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 183 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 184 185 if sys_platform in ("Linux", "Darwin"): 186 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 187 else: 188 S_inv = pinv(S_mat) 189 190 Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 191 inv = mo.rbind( 192 mo.cbind( 193 x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend), 194 y=-np.transpose(Y), 195 backend=self.backend, 196 ), 197 mo.cbind(x=-Y, y=S_inv, backend=self.backend), 198 backend=self.backend, 199 ) 200 201 self.beta_ = mo.safe_sparse_dot( 202 a=inv, 203 b=mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend), 204 backend=self.backend, 205 ) 206 207 self.coef_ = self.beta_ # sklearn compatibility 208 209 return self 210 211 def predict(self, X, **kwargs): 212 """Predict test data X. 213 214 Args: 215 216 X: {array-like}, shape = [n_samples, n_features] 217 Training vectors, where n_samples is the number 218 of samples and n_features is the number of features. 219 220 **kwargs: additional parameters to be passed to 221 self.cook_test_set 222 223 Returns: 224 225 model predictions: {array-like} 226 227 """ 228 229 if len(X.shape) == 1: 230 n_features = X.shape[0] 231 new_X = mo.rbind( 232 x=X.reshape(1, n_features), 233 y=np.ones(n_features).reshape(1, n_features), 234 backend=self.backend, 235 ) 236 237 return ( 238 self.y_mean_ 239 + mo.safe_sparse_dot( 240 a=self.cook_test_set(new_X, **kwargs), 241 b=self.beta_, 242 backend=self.backend, 243 ) 244 )[0] 245 246 return self.y_mean_ + mo.safe_sparse_dot( 247 a=self.cook_test_set(X, **kwargs), 248 b=self.beta_, 249 backend=self.backend, 250 ) 251 252 def partial_fit(self, X, y, learning_rate=0.01, decay=0.001, **kwargs): 253 """Incrementally fit the Ridge model using SGD-style updates. 254 255 Uses the update rule: w_{n+1} = w_n + γ_n * x_n * [y_n - x_n^T * w_n] - γ_n * λ * w_n 256 for online learning with individual samples. 257 258 Args: 259 X: {array-like}, shape = [n_samples, n_features] 260 Training vectors for this batch 261 262 y: array-like, shape = [n_samples] 263 Target values for this batch 264 265 learning_rate: float, default=0.01 266 Initial learning rate for SGD updates 267 268 decay: float, default=0.001 269 Learning rate decay parameter 270 271 **kwargs: additional parameters to be passed to self.cook_training_set 272 273 Returns: 274 self: object 275 """ 276 277 # Input validation 278 X = np.asarray(X) 279 y = np.asarray(y) 280 281 if X.shape[0] != y.shape[0]: 282 raise ValueError("X and y must have the same number of samples") 283 284 # Handle first call 285 if not self._is_fitted: 286 # Initialize learning parameters 287 self.initial_learning_rate = learning_rate 288 self.decay = decay 289 self._step_count = 0 290 self._is_fitted = True 291 292 # Process the batch 293 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 294 295 # Get dimensions 296 n_samples, n_features_total = scaled_Z.shape 297 n_original_features = X.shape[1] 298 299 # Determine feature dimensions for regularization 300 if self.n_clusters > 0: 301 if self.cluster_encode: 302 n_direct_features = n_original_features + self.n_clusters 303 else: 304 n_direct_features = n_original_features + 1 305 else: 306 n_direct_features = n_original_features 307 308 # Initialize beta_ if first time 309 if not hasattr(self, "beta_") or self.beta_ is None: 310 # For regression, beta_ is 1D (single output) 311 self.beta_ = np.zeros(n_features_total) 312 313 # Precompute indices for regularization 314 direct_indices = slice(0, n_direct_features) 315 hidden_indices = slice(n_direct_features, n_features_total) 316 317 # Process each sample with SGD 318 for i in range(n_samples): 319 self._step_count += 1 320 321 # Current learning rate with decay 322 current_lr = self.initial_learning_rate / ( 323 1 + self.decay * self._step_count 324 ) 325 326 # Current sample and target 327 x_i = scaled_Z[i, :] # Feature vector 328 y_i = ( 329 centered_y[i] if centered_y.ndim == 1 else centered_y[i, 0] 330 ) # Scalar target 331 332 # Prediction: x_i^T * beta 333 prediction = x_i @ self.beta_ 334 335 # Error: y_i - prediction 336 error = y_i - prediction 337 338 # Gradient update: current_lr * x_i * error 339 gradient_update = current_lr * x_i * error 340 341 # Regularization terms (more efficient indexing) 342 reg_update = np.zeros_like(self.beta_) 343 reg_update[direct_indices] = ( 344 current_lr * self.lambda1 * self.beta_[direct_indices] 345 ) 346 reg_update[hidden_indices] = ( 347 current_lr * self.lambda2 * self.beta_[hidden_indices] 348 ) 349 350 # Combined update: beta = beta + gradient_update - reg_update 351 self.beta_ += gradient_update - reg_update 352 353 self.coef_ = self.beta_ # sklearn compatibility 354 355 return self
Ridge regression with 2 regularization parameters derived from class Ridge
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
lambda1: float
regularization parameter on direct link
lambda2: float
regularization parameter on hidden layer
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
'cpu' or 'gpu' or 'tpu'
Attributes:
beta_: {array-like}
regression coefficients
coef_: {array-like}
alias for `beta_`, regression coefficients
y_mean_: float
average response
130 def fit(self, X, y, **kwargs): 131 """Fit Ridge model to training data (X, y). 132 133 Args: 134 135 X: {array-like}, shape = [n_samples, n_features] 136 Training vectors, where n_samples is the number 137 of samples and n_features is the number of features. 138 139 y: array-like, shape = [n_samples] 140 Target values. 141 142 **kwargs: additional parameters to be passed to 143 self.cook_training_set or self.obj.fit 144 145 Returns: 146 147 self: object 148 149 """ 150 151 sys_platform = platform.system() 152 153 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 154 155 n_X, p_X = X.shape 156 n_Z, p_Z = scaled_Z.shape 157 158 if self.n_clusters > 0: 159 if self.encode_clusters == True: 160 n_features = p_X + self.n_clusters 161 else: 162 n_features = p_X + 1 163 else: 164 n_features = p_X 165 166 X_ = scaled_Z[:, 0:n_features] 167 Phi_X_ = scaled_Z[:, n_features:p_Z] 168 169 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 170 np.repeat(1, n_features) 171 ) 172 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 173 D = mo.crossprod( 174 x=Phi_X_, backend=self.backend 175 ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1])) 176 177 if sys_platform in ("Linux", "Darwin"): 178 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 179 else: 180 B_inv = pinv(B) 181 182 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 183 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 184 185 if sys_platform in ("Linux", "Darwin"): 186 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 187 else: 188 S_inv = pinv(S_mat) 189 190 Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 191 inv = mo.rbind( 192 mo.cbind( 193 x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend), 194 y=-np.transpose(Y), 195 backend=self.backend, 196 ), 197 mo.cbind(x=-Y, y=S_inv, backend=self.backend), 198 backend=self.backend, 199 ) 200 201 self.beta_ = mo.safe_sparse_dot( 202 a=inv, 203 b=mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend), 204 backend=self.backend, 205 ) 206 207 self.coef_ = self.beta_ # sklearn compatibility 208 209 return self
Fit Ridge model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
211 def predict(self, X, **kwargs): 212 """Predict test data X. 213 214 Args: 215 216 X: {array-like}, shape = [n_samples, n_features] 217 Training vectors, where n_samples is the number 218 of samples and n_features is the number of features. 219 220 **kwargs: additional parameters to be passed to 221 self.cook_test_set 222 223 Returns: 224 225 model predictions: {array-like} 226 227 """ 228 229 if len(X.shape) == 1: 230 n_features = X.shape[0] 231 new_X = mo.rbind( 232 x=X.reshape(1, n_features), 233 y=np.ones(n_features).reshape(1, n_features), 234 backend=self.backend, 235 ) 236 237 return ( 238 self.y_mean_ 239 + mo.safe_sparse_dot( 240 a=self.cook_test_set(new_X, **kwargs), 241 b=self.beta_, 242 backend=self.backend, 243 ) 244 )[0] 245 246 return self.y_mean_ + mo.safe_sparse_dot( 247 a=self.cook_test_set(X, **kwargs), 248 b=self.beta_, 249 backend=self.backend, 250 )
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
28class Ridge2MultiOutputRegressor(Ridge2, RegressorMixin): 29 """Ridge regression with 2 regularization parameters for multiple outputs (zero-loop, JAX-optimized) 30 31 Parameters: 32 33 n_hidden_features: int 34 number of nodes in the hidden layer 35 36 activation_name: str 37 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 38 39 a: float 40 hyperparameter for 'prelu' or 'elu' activation function 41 42 nodes_sim: str 43 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 44 'uniform' 45 46 bias: boolean 47 indicates if the hidden layer contains a bias term (True) or not 48 (False) 49 50 dropout: float 51 regularization parameter; (random) percentage of nodes dropped out 52 of the training 53 54 n_clusters: int 55 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 56 no clustering) 57 58 cluster_encode: bool 59 defines how the variable containing clusters is treated (default is one-hot) 60 if `False`, then labels are used, without one-hot encoding 61 62 type_clust: str 63 type of clustering method: currently k-means ('kmeans') or Gaussian 64 Mixture Model ('gmm') 65 66 type_scaling: a tuple of 3 strings 67 scaling methods for inputs, hidden layer, and clustering respectively 68 (and when relevant). 69 Currently available: standardization ('std') or MinMax scaling ('minmax') 70 71 lambda1: float 72 regularization parameter on direct link 73 74 lambda2: float 75 regularization parameter on hidden layer 76 77 seed: int 78 reproducibility seed for nodes_sim=='uniform' 79 80 backend: str 81 'cpu' or 'gpu' or 'tpu' 82 83 Attributes: 84 85 beta_: {array-like}, shape = [n_features, n_outputs] 86 regression coefficients 87 88 coef_: {array-like} 89 alias for `beta_`, regression coefficients 90 91 y_mean_: array-like, shape = [n_outputs] 92 average response for each output 93 94 """ 95 96 def __init__( 97 self, 98 n_hidden_features=5, 99 activation_name="relu", 100 a=0.01, 101 nodes_sim="sobol", 102 bias=True, 103 dropout=0, 104 n_clusters=2, 105 cluster_encode=True, 106 type_clust="kmeans", 107 type_scaling=("std", "std", "std"), 108 lambda1=0.1, 109 lambda2=0.1, 110 seed=123, 111 backend="cpu", 112 ): 113 if not JAX_AVAILABLE and backend != "cpu": 114 raise RuntimeError( 115 "JAX is required for this feature. Install with: pip install yourpackage[jax]" 116 ) 117 118 super().__init__( 119 n_hidden_features=n_hidden_features, 120 activation_name=activation_name, 121 a=a, 122 nodes_sim=nodes_sim, 123 bias=bias, 124 dropout=dropout, 125 n_clusters=n_clusters, 126 cluster_encode=cluster_encode, 127 type_clust=type_clust, 128 type_scaling=type_scaling, 129 lambda1=lambda1, 130 lambda2=lambda2, 131 seed=seed, 132 backend=backend, 133 ) 134 135 self.type_fit = "regression" 136 self.coef_ = None 137 self.use_jax = JAX_AVAILABLE and backend in ("gpu", "tpu") 138 139 def fit(self, X, y, **kwargs): 140 """Fit Ridge model to training data (X, y) with multiple outputs. 141 142 Args: 143 144 X: {array-like}, shape = [n_samples, n_features] 145 Training vectors, where n_samples is the number 146 of samples and n_features is the number of features. 147 148 y: array-like, shape = [n_samples] or [n_samples, n_outputs] 149 Target values. Can be 1D for single output or 2D for multiple outputs. 150 151 **kwargs: additional parameters to be passed to 152 self.cook_training_set or self.obj.fit 153 154 Returns: 155 156 self: object 157 158 """ 159 160 sys_platform = platform.system() 161 162 # Ensure y is 2D 163 y = np.atleast_2d(y) 164 if y.shape[0] == 1 and y.shape[1] > 1: 165 y = y.T 166 167 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 168 169 n_X, p_X = X.shape 170 n_Z, p_Z = scaled_Z.shape 171 n_outputs = centered_y.shape[1] if centered_y.ndim > 1 else 1 172 173 if self.n_clusters > 0: 174 if self.encode_clusters == True: 175 n_features = p_X + self.n_clusters 176 else: 177 n_features = p_X + 1 178 else: 179 n_features = p_X 180 181 X_ = scaled_Z[:, 0:n_features] 182 Phi_X_ = scaled_Z[:, n_features:p_Z] 183 184 # Use JAX if available and requested 185 if self.use_jax: 186 X_ = jnp.array(X_) 187 Phi_X_ = jnp.array(Phi_X_) 188 centered_y = jnp.array(centered_y) 189 190 # Compute all matrix operations with JAX 191 B = jnp.dot(X_.T, X_) + self.lambda1 * jnp.eye(n_features) 192 C = jnp.dot(Phi_X_.T, X_) 193 D = jnp.dot(Phi_X_.T, Phi_X_) + self.lambda2 * jnp.eye( 194 Phi_X_.shape[1] 195 ) 196 197 B_inv = jpinv(B) 198 W = jnp.dot(C, B_inv) 199 S_mat = D - jnp.dot(W, C.T) 200 S_inv = jpinv(S_mat) 201 Y = jnp.dot(S_inv, W) 202 203 # Build inverse matrix 204 inv_upper = jnp.hstack([B_inv + jnp.dot(W.T, Y), -Y.T]) 205 inv_lower = jnp.hstack([-Y, S_inv]) 206 inv = jnp.vstack([inv_upper, inv_lower]) 207 208 # Compute beta for all outputs at once (vectorized) 209 Z_T_y = jnp.dot(scaled_Z.T, centered_y) 210 self.beta_ = jnp.dot(inv, Z_T_y) 211 212 # Convert back to numpy 213 self.beta_ = np.array(self.beta_) 214 else: 215 # NumPy version 216 B = mo.crossprod( 217 x=X_, backend=self.backend 218 ) + self.lambda1 * np.diag(np.repeat(1, n_features)) 219 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 220 D = mo.crossprod( 221 x=Phi_X_, backend=self.backend 222 ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1])) 223 224 if sys_platform in ("Linux", "Darwin"): 225 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 226 else: 227 B_inv = pinv(B) 228 229 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 230 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 231 232 if sys_platform in ("Linux", "Darwin"): 233 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 234 else: 235 S_inv = pinv(S_mat) 236 237 Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 238 inv = mo.rbind( 239 mo.cbind( 240 x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend), 241 y=-np.transpose(Y), 242 backend=self.backend, 243 ), 244 mo.cbind(x=-Y, y=S_inv, backend=self.backend), 245 backend=self.backend, 246 ) 247 248 # Vectorized multi-output computation (no loop) 249 Z_T_y = mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend) 250 self.beta_ = mo.safe_sparse_dot( 251 a=inv, b=Z_T_y, backend=self.backend 252 ) 253 254 self.coef_ = self.beta_ # sklearn compatibility 255 256 return self 257 258 def predict(self, X, **kwargs): 259 """Predict test data X for all outputs. 260 261 Args: 262 263 X: {array-like}, shape = [n_samples, n_features] 264 Training vectors, where n_samples is the number 265 of samples and n_features is the number of features. 266 267 **kwargs: additional parameters to be passed to 268 self.cook_test_set 269 270 Returns: 271 272 model predictions: {array-like}, shape = [n_samples, n_outputs] 273 274 """ 275 276 if len(X.shape) == 1: 277 n_features = X.shape[0] 278 new_X = mo.rbind( 279 x=X.reshape(1, n_features), 280 y=np.ones(n_features).reshape(1, n_features), 281 backend=self.backend, 282 ) 283 284 cooked = self.cook_test_set(new_X, **kwargs) 285 286 if self.use_jax: 287 cooked = jnp.array(cooked) 288 predictions = self.y_mean_ + jnp.dot(cooked, self.beta_) 289 return np.array(predictions[0]) 290 else: 291 return ( 292 self.y_mean_ 293 + mo.safe_sparse_dot( 294 a=cooked, 295 b=self.beta_, 296 backend=self.backend, 297 ) 298 )[0] 299 300 cooked = self.cook_test_set(X, **kwargs) 301 302 if self.use_jax: 303 cooked = jnp.array(cooked) 304 predictions = self.y_mean_ + jnp.dot(cooked, self.beta_) 305 return np.array(predictions) 306 else: 307 return self.y_mean_ + mo.safe_sparse_dot( 308 a=cooked, 309 b=self.beta_, 310 backend=self.backend, 311 ) 312 313 def partial_fit(self, X, y, learning_rate=0.01, decay=0.001, **kwargs): 314 """Incrementally fit the Ridge model using vectorized SGD updates (zero-loop with JAX). 315 316 Uses vectorized update rule for all outputs simultaneously. 317 318 Args: 319 X: {array-like}, shape = [n_samples, n_features] 320 Training vectors for this batch 321 322 y: array-like, shape = [n_samples] or [n_samples, n_outputs] 323 Target values for this batch 324 325 learning_rate: float, default=0.01 326 Initial learning rate for SGD updates 327 328 decay: float, default=0.001 329 Learning rate decay parameter 330 331 **kwargs: additional parameters to be passed to self.cook_training_set 332 333 Returns: 334 self: object 335 """ 336 337 # Input validation 338 X = np.asarray(X) 339 y = np.atleast_2d(y) 340 if y.shape[0] == 1 and y.shape[1] > 1: 341 y = y.T 342 343 if X.shape[0] != y.shape[0]: 344 raise ValueError("X and y must have the same number of samples") 345 346 # Handle first call 347 if not self._is_fitted: 348 self.initial_learning_rate = learning_rate 349 self.decay = decay 350 self._step_count = 0 351 self._is_fitted = True 352 353 # Process the batch 354 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 355 356 # Get dimensions 357 n_samples, n_features_total = scaled_Z.shape 358 n_original_features = X.shape[1] 359 n_outputs = centered_y.shape[1] if centered_y.ndim > 1 else 1 360 361 # Determine feature dimensions for regularization 362 if self.n_clusters > 0: 363 if self.cluster_encode: 364 n_direct_features = n_original_features + self.n_clusters 365 else: 366 n_direct_features = n_original_features + 1 367 else: 368 n_direct_features = n_original_features 369 370 # Initialize beta_ if first time 371 if not hasattr(self, "beta_") or self.beta_ is None: 372 self.beta_ = np.zeros((n_features_total, n_outputs)) 373 374 # Create regularization mask 375 reg_mask = np.concatenate( 376 [ 377 np.full(n_direct_features, self.lambda1), 378 np.full(n_features_total - n_direct_features, self.lambda2), 379 ] 380 )[ 381 :, np.newaxis 382 ] # Shape: [n_features_total, 1] 383 384 if self.use_jax: 385 # JAX vectorized implementation (fully zero-loop) 386 scaled_Z = jnp.array(scaled_Z) 387 centered_y = jnp.array(centered_y) 388 self.beta_ = jnp.array(self.beta_) 389 reg_mask = jnp.array(reg_mask) 390 391 # Vectorized over all samples using scan 392 def update_step(beta, inputs): 393 step, x_i, y_i = inputs 394 395 # Learning rate with decay 396 lr = self.initial_learning_rate / (1 + self.decay * step) 397 398 # Prediction: x_i @ beta -> [n_outputs] 399 prediction = jnp.dot(x_i, beta) 400 401 # Error: y_i - prediction -> [n_outputs] 402 error = y_i - prediction 403 404 # Gradient update (vectorized): lr * outer(x_i, error) 405 gradient_update = lr * jnp.outer(x_i, error) 406 407 # Regularization: lr * (reg_mask * beta) 408 reg_update = lr * (reg_mask * beta) 409 410 # Update: beta = beta + gradient - regularization 411 beta_new = beta + gradient_update - reg_update 412 413 return beta_new, None 414 415 # Create step indices 416 steps = jnp.arange( 417 self._step_count + 1, self._step_count + n_samples + 1 418 ) 419 420 # Run scan (zero-loop) 421 self.beta_, _ = jax.lax.scan( 422 update_step, self.beta_, (steps, scaled_Z, centered_y) 423 ) 424 425 self.beta_ = np.array(self.beta_) 426 self._step_count += n_samples 427 else: 428 # NumPy vectorized implementation (single loop over samples) 429 for i in range(n_samples): 430 self._step_count += 1 431 432 # Current learning rate with decay 433 current_lr = self.initial_learning_rate / ( 434 1 + self.decay * self._step_count 435 ) 436 437 # Current sample and target 438 x_i = scaled_Z[i, :] # [n_features_total] 439 y_i = centered_y[i, :] # [n_outputs] 440 441 # Prediction: x_i @ beta -> [n_outputs] 442 prediction = x_i @ self.beta_ 443 444 # Error: y_i - prediction -> [n_outputs] 445 error = y_i - prediction 446 447 # Vectorized gradient update: outer product 448 # Shape: [n_features_total, n_outputs] 449 gradient_update = current_lr * np.outer(x_i, error) 450 451 # Vectorized regularization update 452 reg_update = current_lr * (reg_mask * self.beta_) 453 454 # Combined update 455 self.beta_ += gradient_update - reg_update 456 457 self.coef_ = self.beta_ # sklearn compatibility 458 459 return self
Ridge regression with 2 regularization parameters for multiple outputs (zero-loop, JAX-optimized)
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
lambda1: float
regularization parameter on direct link
lambda2: float
regularization parameter on hidden layer
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
'cpu' or 'gpu' or 'tpu'
Attributes:
beta_: {array-like}, shape = [n_features, n_outputs]
regression coefficients
coef_: {array-like}
alias for `beta_`, regression coefficients
y_mean_: array-like, shape = [n_outputs]
average response for each output
139 def fit(self, X, y, **kwargs): 140 """Fit Ridge model to training data (X, y) with multiple outputs. 141 142 Args: 143 144 X: {array-like}, shape = [n_samples, n_features] 145 Training vectors, where n_samples is the number 146 of samples and n_features is the number of features. 147 148 y: array-like, shape = [n_samples] or [n_samples, n_outputs] 149 Target values. Can be 1D for single output or 2D for multiple outputs. 150 151 **kwargs: additional parameters to be passed to 152 self.cook_training_set or self.obj.fit 153 154 Returns: 155 156 self: object 157 158 """ 159 160 sys_platform = platform.system() 161 162 # Ensure y is 2D 163 y = np.atleast_2d(y) 164 if y.shape[0] == 1 and y.shape[1] > 1: 165 y = y.T 166 167 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 168 169 n_X, p_X = X.shape 170 n_Z, p_Z = scaled_Z.shape 171 n_outputs = centered_y.shape[1] if centered_y.ndim > 1 else 1 172 173 if self.n_clusters > 0: 174 if self.encode_clusters == True: 175 n_features = p_X + self.n_clusters 176 else: 177 n_features = p_X + 1 178 else: 179 n_features = p_X 180 181 X_ = scaled_Z[:, 0:n_features] 182 Phi_X_ = scaled_Z[:, n_features:p_Z] 183 184 # Use JAX if available and requested 185 if self.use_jax: 186 X_ = jnp.array(X_) 187 Phi_X_ = jnp.array(Phi_X_) 188 centered_y = jnp.array(centered_y) 189 190 # Compute all matrix operations with JAX 191 B = jnp.dot(X_.T, X_) + self.lambda1 * jnp.eye(n_features) 192 C = jnp.dot(Phi_X_.T, X_) 193 D = jnp.dot(Phi_X_.T, Phi_X_) + self.lambda2 * jnp.eye( 194 Phi_X_.shape[1] 195 ) 196 197 B_inv = jpinv(B) 198 W = jnp.dot(C, B_inv) 199 S_mat = D - jnp.dot(W, C.T) 200 S_inv = jpinv(S_mat) 201 Y = jnp.dot(S_inv, W) 202 203 # Build inverse matrix 204 inv_upper = jnp.hstack([B_inv + jnp.dot(W.T, Y), -Y.T]) 205 inv_lower = jnp.hstack([-Y, S_inv]) 206 inv = jnp.vstack([inv_upper, inv_lower]) 207 208 # Compute beta for all outputs at once (vectorized) 209 Z_T_y = jnp.dot(scaled_Z.T, centered_y) 210 self.beta_ = jnp.dot(inv, Z_T_y) 211 212 # Convert back to numpy 213 self.beta_ = np.array(self.beta_) 214 else: 215 # NumPy version 216 B = mo.crossprod( 217 x=X_, backend=self.backend 218 ) + self.lambda1 * np.diag(np.repeat(1, n_features)) 219 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 220 D = mo.crossprod( 221 x=Phi_X_, backend=self.backend 222 ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1])) 223 224 if sys_platform in ("Linux", "Darwin"): 225 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 226 else: 227 B_inv = pinv(B) 228 229 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 230 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 231 232 if sys_platform in ("Linux", "Darwin"): 233 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 234 else: 235 S_inv = pinv(S_mat) 236 237 Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 238 inv = mo.rbind( 239 mo.cbind( 240 x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend), 241 y=-np.transpose(Y), 242 backend=self.backend, 243 ), 244 mo.cbind(x=-Y, y=S_inv, backend=self.backend), 245 backend=self.backend, 246 ) 247 248 # Vectorized multi-output computation (no loop) 249 Z_T_y = mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend) 250 self.beta_ = mo.safe_sparse_dot( 251 a=inv, b=Z_T_y, backend=self.backend 252 ) 253 254 self.coef_ = self.beta_ # sklearn compatibility 255 256 return self
Fit Ridge model to training data (X, y) with multiple outputs.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples] or [n_samples, n_outputs]
Target values. Can be 1D for single output or 2D for multiple outputs.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
258 def predict(self, X, **kwargs): 259 """Predict test data X for all outputs. 260 261 Args: 262 263 X: {array-like}, shape = [n_samples, n_features] 264 Training vectors, where n_samples is the number 265 of samples and n_features is the number of features. 266 267 **kwargs: additional parameters to be passed to 268 self.cook_test_set 269 270 Returns: 271 272 model predictions: {array-like}, shape = [n_samples, n_outputs] 273 274 """ 275 276 if len(X.shape) == 1: 277 n_features = X.shape[0] 278 new_X = mo.rbind( 279 x=X.reshape(1, n_features), 280 y=np.ones(n_features).reshape(1, n_features), 281 backend=self.backend, 282 ) 283 284 cooked = self.cook_test_set(new_X, **kwargs) 285 286 if self.use_jax: 287 cooked = jnp.array(cooked) 288 predictions = self.y_mean_ + jnp.dot(cooked, self.beta_) 289 return np.array(predictions[0]) 290 else: 291 return ( 292 self.y_mean_ 293 + mo.safe_sparse_dot( 294 a=cooked, 295 b=self.beta_, 296 backend=self.backend, 297 ) 298 )[0] 299 300 cooked = self.cook_test_set(X, **kwargs) 301 302 if self.use_jax: 303 cooked = jnp.array(cooked) 304 predictions = self.y_mean_ + jnp.dot(cooked, self.beta_) 305 return np.array(predictions) 306 else: 307 return self.y_mean_ + mo.safe_sparse_dot( 308 a=cooked, 309 b=self.beta_, 310 backend=self.backend, 311 )
Predict test data X for all outputs.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}, shape = [n_samples, n_outputs]
18class Ridge2Classifier(Ridge2, ClassifierMixin): 19 """Multinomial logit classification with 2 regularization parameters 20 21 Parameters: 22 23 n_hidden_features: int 24 number of nodes in the hidden layer 25 26 activation_name: str 27 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 28 29 a: float 30 hyperparameter for 'prelu' or 'elu' activation function 31 32 nodes_sim: str 33 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 34 'uniform' 35 36 bias: boolean 37 indicates if the hidden layer contains a bias term (True) or not 38 (False) 39 40 dropout: float 41 regularization parameter; (random) percentage of nodes dropped out 42 of the training 43 44 direct_link: boolean 45 indicates if the original predictors are included (True) in model's 46 fitting or not (False) 47 48 n_clusters: int 49 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 50 no clustering) 51 52 cluster_encode: bool 53 defines how the variable containing clusters is treated (default is one-hot) 54 if `False`, then labels are used, without one-hot encoding 55 56 type_clust: str 57 type of clustering method: currently k-means ('kmeans') or Gaussian 58 Mixture Model ('gmm') 59 60 type_scaling: a tuple of 3 strings 61 scaling methods for inputs, hidden layer, and clustering respectively 62 (and when relevant). 63 Currently available: standardization ('std') or MinMax scaling ('minmax') 64 65 lambda1: float 66 regularization parameter on direct link 67 68 lambda2: float 69 regularization parameter on hidden layer 70 71 solver: str 72 optimization function "L-BFGS-B", "Newton-CG", 73 "trust-ncg", "L-BFGS-B-lstsq", "Newton-CG-lstsq", 74 "trust-ncg-lstsq" (see scipy.optimize.minimize) 75 When using "L-BFGS-B-lstsq", "Newton-CG-lstsq", or "trust-ncg-lstsq", 76 the initial value for the optimization is set to the least squares solution 77 78 seed: int 79 reproducibility seed for nodes_sim=='uniform' 80 81 backend: str 82 "cpu" or "gpu" or "tpu" 83 84 Attributes: 85 86 beta_: {array-like} 87 regression coefficients 88 89 classes_: {array-like} 90 unique classes in the target variable 91 92 minloglik_: float 93 minimum value of the negative log-likelihood 94 95 Examples: 96 97 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py) 98 99 ```python 100 import nnetsauce as ns 101 import numpy as np 102 from sklearn.datasets import load_breast_cancer 103 from sklearn.model_selection import train_test_split 104 from time import time 105 106 107 breast_cancer = load_breast_cancer() 108 X = breast_cancer.data 109 y = breast_cancer.target 110 111 # split data into training test and test set 112 np.random.seed(123) 113 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) 114 115 # create the model with nnetsauce 116 fit_obj = ns.Ridge2Classifier(lambda1 = 6.90185578e+04, 117 lambda2 = 3.17392781e+02, 118 n_hidden_features=95, 119 n_clusters=2, 120 dropout = 3.62817383e-01, 121 type_clust = "gmm") 122 123 # fit the model on training set 124 start = time() 125 fit_obj.fit(X_train, y_train) 126 print(f"Elapsed {time() - start}") 127 128 # get the accuracy on test set 129 start = time() 130 print(fit_obj.score(X_test, y_test)) 131 print(f"Elapsed {time() - start}") 132 133 # get area under the curve on test set (auc) 134 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 135 ``` 136 137 138 """ 139 140 _estimator_type = "classifier" 141 142 # construct the object ----- 143 144 def __init__( 145 self, 146 n_hidden_features=5, 147 activation_name="relu", 148 a=0.01, 149 nodes_sim="sobol", 150 bias=True, 151 dropout=0, 152 direct_link=True, 153 n_clusters=2, 154 cluster_encode=True, 155 type_clust="kmeans", 156 type_scaling=("std", "std", "std"), 157 lambda1=0.1, 158 lambda2=0.1, 159 solver="L-BFGS-B", 160 seed=123, 161 backend="cpu", 162 ): 163 super().__init__( 164 n_hidden_features=n_hidden_features, 165 activation_name=activation_name, 166 a=a, 167 nodes_sim=nodes_sim, 168 bias=bias, 169 dropout=dropout, 170 direct_link=direct_link, 171 n_clusters=n_clusters, 172 cluster_encode=cluster_encode, 173 type_clust=type_clust, 174 type_scaling=type_scaling, 175 lambda1=lambda1, 176 lambda2=lambda2, 177 seed=seed, 178 backend=backend, 179 ) 180 181 self.type_fit = "classification" 182 self.solver = solver 183 self.beta_ = None 184 self.classes_ = None 185 self.minloglik_ = None 186 self.coef_ = None 187 188 def loglik(self, X, Y, **kwargs): 189 """Log-likelihood for training data (X, Y). 190 191 Args: 192 193 X: {array-like}, shape = [n_samples, n_features] 194 Training vectors, where n_samples is the number 195 of samples and n_features is the number of features. 196 197 Y: array-like, shape = [n_samples] 198 One-hot encode target values. 199 200 **kwargs: additional parameters to be passed to 201 self.cook_training_set or self.obj.fit 202 203 Returns: 204 205 """ 206 207 def loglik_grad_hess(Y, X, B, XB, hessian=True, **kwargs): 208 # nobs, n_classes 209 n, K = Y.shape 210 211 # total number of covariates 212 p = X.shape[1] 213 214 # initial number of covariates 215 init_p = p - self.n_hidden_features 216 217 max_double = 709.0 218 XB[XB > max_double] = max_double 219 exp_XB = np.exp(XB) 220 probs = exp_XB / exp_XB.sum(axis=1)[:, None] 221 222 # gradient ----- 223 # (Y - p) -> (n, K) 224 # X -> (n, p) 225 # (K, n) %*% (n, p) -> (K, p) 226 if hessian is False: 227 grad = ( 228 -mo.safe_sparse_dot( 229 a=(Y - probs).T, b=X, backend=self.backend 230 ) 231 / n 232 ) 233 grad += self.lambda1 * B[0:init_p, :].sum(axis=0)[:, None] 234 grad += self.lambda2 * B[init_p:p, :].sum(axis=0)[:, None] 235 236 return grad.flatten() 237 238 # hessian ----- 239 if hessian is True: 240 Kp = K * p 241 hess = np.zeros((Kp, Kp), float) 242 for k1 in range(K): 243 x_index = range(k1 * p, (k1 + 1) * p) 244 for k2 in range(k1, K): 245 y_index = range(k2 * p, (k2 + 1) * p) 246 H_sub = ( 247 -mo.safe_sparse_dot( 248 a=X.T, 249 b=(probs[:, k1] * probs[:, k2])[:, None] * X, 250 backend=self.backend, 251 ) 252 / n 253 ) # do not store 254 hess[np.ix_(x_index, y_index)] = hess[ 255 np.ix_(y_index, x_index) 256 ] = H_sub 257 258 return hess + (self.lambda1 + self.lambda2) * np.identity(Kp) 259 260 # total number of covariates 261 p = X.shape[1] 262 263 # initial number of covariates 264 init_p = p - self.n_hidden_features 265 266 # log-likelihood (1st return) 267 def loglik_func(x): 268 # (p, K) 269 B = x.reshape(Y.shape[1], p).T 270 271 # (n, K) 272 XB = mo.safe_sparse_dot(X, B, backend=self.backend) 273 274 res = -(np.sum(Y * XB, axis=1) - logsumexp(XB)).mean() 275 276 res += ( 277 0.5 278 * self.lambda1 279 * mo.squared_norm(B[0:init_p, :], backend=self.backend) 280 ) 281 res += ( 282 0.5 283 * self.lambda2 284 * mo.squared_norm(B[init_p:p, :], backend=self.backend) 285 ) 286 287 return res 288 289 # gradient of log-likelihood 290 def grad_func(x): 291 # (p, K) 292 B = x.reshape(Y.shape[1], p).T 293 294 return loglik_grad_hess( 295 Y=Y, 296 X=X, 297 B=B, 298 XB=mo.safe_sparse_dot(X, B, backend=self.backend), 299 hessian=False, 300 **kwargs 301 ) 302 303 # hessian of log-likelihood 304 def hessian_func(x): 305 # (p, K) 306 B = x.reshape(Y.shape[1], p).T 307 308 return loglik_grad_hess( 309 Y=Y, 310 X=X, 311 B=B, 312 XB=mo.safe_sparse_dot(X, B, backend=self.backend), 313 hessian=True, 314 **kwargs 315 ) 316 317 return loglik_func, grad_func, hessian_func 318 319 # newton-cg 320 # L-BFGS-B 321 def fit(self, X, y, **kwargs): 322 """Fit Ridge model to training data (X, y). 323 324 for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) 325 for K classes and p covariates. 326 327 Args: 328 329 X: {array-like}, shape = [n_samples, n_features] 330 Training vectors, where n_samples is the number 331 of samples and n_features is the number of features. 332 333 y: array-like, shape = [n_samples] 334 Target values. 335 336 **kwargs: additional parameters to be passed to 337 self.cook_training_set or self.obj.fit 338 339 Returns: 340 341 self: object 342 343 """ 344 345 assert mx.is_factor(y), "y must contain only integers" 346 347 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 348 349 self.n_classes = len(np.unique(y)) 350 self.classes_ = np.unique(y) # for compatibility with sklearn 351 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 352 353 Y = mo.one_hot_encode2(output_y, self.n_classes) 354 355 # optimize for beta, minimize self.loglik (maximize loglik) ----- 356 loglik_func, grad_func, hessian_func = self.loglik(X=scaled_Z, Y=Y) 357 358 if self.solver == "L-BFGS-B": 359 opt = minimize( 360 fun=loglik_func, 361 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 362 jac=grad_func, 363 method=self.solver, 364 ) 365 self.beta_ = opt.x 366 self.minloglik_ = opt.fun 367 368 if self.solver in ("Newton-CG", "trust-ncg"): 369 opt = minimize( 370 fun=loglik_func, 371 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 372 jac=grad_func, 373 hess=hessian_func, 374 method=self.solver, 375 ) 376 self.beta_ = opt.x 377 self.minloglik_ = opt.fun 378 379 if self.solver == "L-BFGS-B-lstsq": 380 opt = minimize( 381 fun=loglik_func, 382 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten( 383 order="F" 384 ), 385 jac=grad_func, 386 method="L-BFGS-B", 387 ) 388 self.beta_ = opt.x 389 self.minloglik_ = opt.fun 390 391 if self.solver in "Newton-CG-lstsq": 392 opt = minimize( 393 fun=loglik_func, 394 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten( 395 order="F" 396 ), 397 jac=grad_func, 398 hess=hessian_func, 399 method="Newton-CG", 400 ) 401 self.beta_ = opt.x 402 self.minloglik_ = opt.fun 403 404 if self.solver in "trust-ncg-lstsq": 405 opt = minimize( 406 fun=loglik_func, 407 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten( 408 order="F" 409 ), 410 jac=grad_func, 411 hess=hessian_func, 412 method="trust-ncg", 413 ) 414 self.beta_ = opt.x 415 self.minloglik_ = opt.fun 416 417 self.coef_ = self.beta_ 418 419 self.classes_ = np.unique(y) 420 421 return self 422 423 def predict(self, X, **kwargs): 424 """Predict test data X. 425 426 Args: 427 428 X: {array-like}, shape = [n_samples, n_features] 429 Training vectors, where n_samples is the number 430 of samples and n_features is the number of features. 431 432 **kwargs: additional parameters to be passed to 433 self.cook_test_set 434 435 Returns: 436 437 model predictions: {array-like} 438 """ 439 440 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 441 442 def predict_proba(self, X, **kwargs): 443 """Predict probabilities for test data X. 444 445 Args: 446 447 X: {array-like}, shape = [n_samples, n_features] 448 Training vectors, where n_samples is the number 449 of samples and n_features is the number of features. 450 451 **kwargs: additional parameters to be passed to 452 self.cook_test_set 453 454 Returns: 455 456 probability estimates for test data: {array-like} 457 458 """ 459 if len(X.shape) == 1: 460 n_features = X.shape[0] 461 new_X = mo.rbind( 462 X.reshape(1, n_features), 463 np.ones(n_features).reshape(1, n_features), 464 ) 465 466 Z = self.cook_test_set(new_X, **kwargs) 467 468 else: 469 Z = self.cook_test_set(X, **kwargs) 470 471 ZB = mo.safe_sparse_dot( 472 a=Z, 473 b=self.beta_.reshape( 474 self.n_classes, 475 X.shape[1] + self.n_hidden_features + self.n_clusters, 476 ).T, 477 backend=self.backend, 478 ) 479 480 exp_ZB = np.exp(ZB) 481 482 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 483 484 @property 485 def _estimator_type(self): 486 return "classifier"
Multinomial logit classification with 2 regularization parameters
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
lambda1: float
regularization parameter on direct link
lambda2: float
regularization parameter on hidden layer
solver: str
optimization function "L-BFGS-B", "Newton-CG",
"trust-ncg", "L-BFGS-B-lstsq", "Newton-CG-lstsq",
"trust-ncg-lstsq" (see scipy.optimize.minimize)
When using "L-BFGS-B-lstsq", "Newton-CG-lstsq", or "trust-ncg-lstsq",
the initial value for the optimization is set to the least squares solution
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: {array-like}
regression coefficients
classes_: {array-like}
unique classes in the target variable
minloglik_: float
minimum value of the negative log-likelihood
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from time import time
breast_cancer = load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target
# split data into training test and test set
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# create the model with nnetsauce
fit_obj = ns.Ridge2Classifier(lambda1 = 6.90185578e+04,
lambda2 = 3.17392781e+02,
n_hidden_features=95,
n_clusters=2,
dropout = 3.62817383e-01,
type_clust = "gmm")
# fit the model on training set
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
# get the accuracy on test set
start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")
# get area under the curve on test set (auc)
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
321 def fit(self, X, y, **kwargs): 322 """Fit Ridge model to training data (X, y). 323 324 for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) 325 for K classes and p covariates. 326 327 Args: 328 329 X: {array-like}, shape = [n_samples, n_features] 330 Training vectors, where n_samples is the number 331 of samples and n_features is the number of features. 332 333 y: array-like, shape = [n_samples] 334 Target values. 335 336 **kwargs: additional parameters to be passed to 337 self.cook_training_set or self.obj.fit 338 339 Returns: 340 341 self: object 342 343 """ 344 345 assert mx.is_factor(y), "y must contain only integers" 346 347 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 348 349 self.n_classes = len(np.unique(y)) 350 self.classes_ = np.unique(y) # for compatibility with sklearn 351 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 352 353 Y = mo.one_hot_encode2(output_y, self.n_classes) 354 355 # optimize for beta, minimize self.loglik (maximize loglik) ----- 356 loglik_func, grad_func, hessian_func = self.loglik(X=scaled_Z, Y=Y) 357 358 if self.solver == "L-BFGS-B": 359 opt = minimize( 360 fun=loglik_func, 361 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 362 jac=grad_func, 363 method=self.solver, 364 ) 365 self.beta_ = opt.x 366 self.minloglik_ = opt.fun 367 368 if self.solver in ("Newton-CG", "trust-ncg"): 369 opt = minimize( 370 fun=loglik_func, 371 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 372 jac=grad_func, 373 hess=hessian_func, 374 method=self.solver, 375 ) 376 self.beta_ = opt.x 377 self.minloglik_ = opt.fun 378 379 if self.solver == "L-BFGS-B-lstsq": 380 opt = minimize( 381 fun=loglik_func, 382 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten( 383 order="F" 384 ), 385 jac=grad_func, 386 method="L-BFGS-B", 387 ) 388 self.beta_ = opt.x 389 self.minloglik_ = opt.fun 390 391 if self.solver in "Newton-CG-lstsq": 392 opt = minimize( 393 fun=loglik_func, 394 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten( 395 order="F" 396 ), 397 jac=grad_func, 398 hess=hessian_func, 399 method="Newton-CG", 400 ) 401 self.beta_ = opt.x 402 self.minloglik_ = opt.fun 403 404 if self.solver in "trust-ncg-lstsq": 405 opt = minimize( 406 fun=loglik_func, 407 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten( 408 order="F" 409 ), 410 jac=grad_func, 411 hess=hessian_func, 412 method="trust-ncg", 413 ) 414 self.beta_ = opt.x 415 self.minloglik_ = opt.fun 416 417 self.coef_ = self.beta_ 418 419 self.classes_ = np.unique(y) 420 421 return self
Fit Ridge model to training data (X, y).
for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) for K classes and p covariates.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
423 def predict(self, X, **kwargs): 424 """Predict test data X. 425 426 Args: 427 428 X: {array-like}, shape = [n_samples, n_features] 429 Training vectors, where n_samples is the number 430 of samples and n_features is the number of features. 431 432 **kwargs: additional parameters to be passed to 433 self.cook_test_set 434 435 Returns: 436 437 model predictions: {array-like} 438 """ 439 440 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
442 def predict_proba(self, X, **kwargs): 443 """Predict probabilities for test data X. 444 445 Args: 446 447 X: {array-like}, shape = [n_samples, n_features] 448 Training vectors, where n_samples is the number 449 of samples and n_features is the number of features. 450 451 **kwargs: additional parameters to be passed to 452 self.cook_test_set 453 454 Returns: 455 456 probability estimates for test data: {array-like} 457 458 """ 459 if len(X.shape) == 1: 460 n_features = X.shape[0] 461 new_X = mo.rbind( 462 X.reshape(1, n_features), 463 np.ones(n_features).reshape(1, n_features), 464 ) 465 466 Z = self.cook_test_set(new_X, **kwargs) 467 468 else: 469 Z = self.cook_test_set(X, **kwargs) 470 471 ZB = mo.safe_sparse_dot( 472 a=Z, 473 b=self.beta_.reshape( 474 self.n_classes, 475 X.shape[1] + self.n_hidden_features + self.n_clusters, 476 ).T, 477 backend=self.backend, 478 ) 479 480 exp_ZB = np.exp(ZB) 481 482 return exp_ZB / exp_ZB.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
23class Ridge2MultitaskClassifier(Ridge2, ClassifierMixin): 24 """Multitask Ridge classification with 2 regularization parameters 25 26 Parameters: 27 28 n_hidden_features: int 29 number of nodes in the hidden layer 30 31 activation_name: str 32 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 33 34 a: float 35 hyperparameter for 'prelu' or 'elu' activation function 36 37 nodes_sim: str 38 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 39 'uniform' 40 41 bias: boolean 42 indicates if the hidden layer contains a bias term (True) or not 43 (False) 44 45 dropout: float 46 regularization parameter; (random) percentage of nodes dropped out 47 of the training 48 49 n_clusters: int 50 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 51 no clustering) 52 53 cluster_encode: bool 54 defines how the variable containing clusters is treated (default is one-hot) 55 if `False`, then labels are used, without one-hot encoding 56 57 type_clust: str 58 type of clustering method: currently k-means ('kmeans') or Gaussian 59 Mixture Model ('gmm') 60 61 type_scaling: a tuple of 3 strings 62 scaling methods for inputs, hidden layer, and clustering respectively 63 (and when relevant). 64 Currently available: standardization ('std') or MinMax scaling ('minmax') 65 66 lambda1: float 67 regularization parameter on direct link 68 69 lambda2: float 70 regularization parameter on hidden layer 71 72 seed: int 73 reproducibility seed for nodes_sim=='uniform' 74 75 backend: str 76 "cpu" or "gpu" or "tpu" 77 78 Attributes: 79 80 beta_: {array-like} 81 regression coefficients 82 83 coef_: {array-like} 84 alias for `beta_`, regression coefficients 85 86 Examples: 87 88 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py) 89 90 ```python 91 import nnetsauce as ns 92 import numpy as np 93 from sklearn.datasets import load_breast_cancer 94 from sklearn.model_selection import train_test_split 95 from sklearn import metrics 96 from time import time 97 98 breast_cancer = load_breast_cancer() 99 Z = breast_cancer.data 100 t = breast_cancer.target 101 np.random.seed(123) 102 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2) 103 104 fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=int(9.83730469e+01), 105 dropout=4.31054687e-01, 106 n_clusters=int(1.71484375e+00), 107 lambda1=1.24023438e+01, lambda2=7.30263672e+03) 108 109 start = time() 110 fit_obj.fit(X_train, y_train) 111 print(f"Elapsed {time() - start}") 112 113 print(fit_obj.score(X_test, y_test)) 114 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 115 116 start = time() 117 preds = fit_obj.predict(X_test) 118 print(f"Elapsed {time() - start}") 119 print(metrics.classification_report(preds, y_test)) 120 ``` 121 122 """ 123 124 # construct the object ----- 125 _estimator_type = "classifier" 126 127 def __init__( 128 self, 129 n_hidden_features=5, 130 activation_name="relu", 131 a=0.01, 132 nodes_sim="sobol", 133 bias=True, 134 dropout=0, 135 n_clusters=2, 136 cluster_encode=True, 137 type_clust="kmeans", 138 type_scaling=("std", "std", "std"), 139 lambda1=0.1, 140 lambda2=0.1, 141 seed=123, 142 backend="cpu", 143 ): 144 super().__init__( 145 n_hidden_features=n_hidden_features, 146 activation_name=activation_name, 147 a=a, 148 nodes_sim=nodes_sim, 149 bias=bias, 150 dropout=dropout, 151 n_clusters=n_clusters, 152 cluster_encode=cluster_encode, 153 type_clust=type_clust, 154 type_scaling=type_scaling, 155 lambda1=lambda1, 156 lambda2=lambda2, 157 seed=seed, 158 backend=backend, 159 ) 160 161 self.type_fit = "classification" 162 self.coef_ = None 163 164 def fit(self, X, y, **kwargs): 165 """Fit Ridge model to training data (X, y). 166 167 Args: 168 169 X: {array-like}, shape = [n_samples, n_features] 170 Training vectors, where n_samples is the number 171 of samples and n_features is the number of features. 172 173 y: array-like, shape = [n_samples] 174 Target values. 175 176 **kwargs: additional parameters to be passed to 177 self.cook_training_set or self.obj.fit 178 179 Returns: 180 181 self: object 182 183 """ 184 185 sys_platform = platform.system() 186 187 assert mx.is_factor(y), "y must contain only integers" 188 189 self.classes_ = np.unique(y) # for compatibility with sklearn 190 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 191 192 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 193 194 n_X, p_X = X.shape 195 n_Z, p_Z = scaled_Z.shape 196 197 self.n_classes = len(np.unique(y)) 198 199 # multitask response 200 Y = mo.one_hot_encode2(output_y, self.n_classes) 201 202 if self.n_clusters > 0: 203 if self.encode_clusters == True: 204 n_features = p_X + self.n_clusters 205 else: 206 n_features = p_X + 1 207 else: 208 n_features = p_X 209 210 X_ = scaled_Z[:, 0:n_features] 211 Phi_X_ = scaled_Z[:, n_features:p_Z] 212 213 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 214 np.repeat(1, X_.shape[1]) 215 ) 216 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 217 D = mo.crossprod( 218 x=Phi_X_, backend=self.backend 219 ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1])) 220 221 if sys_platform in ("Linux", "Darwin"): 222 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 223 else: 224 B_inv = pinv(B) 225 226 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 227 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 228 229 if sys_platform in ("Linux", "Darwin"): 230 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 231 else: 232 S_inv = pinv(S_mat) 233 234 Y2 = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 235 inv = mo.rbind( 236 mo.cbind( 237 x=B_inv + mo.crossprod(x=W, y=Y2, backend=self.backend), 238 y=-np.transpose(Y2), 239 backend=self.backend, 240 ), 241 mo.cbind(x=-Y2, y=S_inv, backend=self.backend), 242 backend=self.backend, 243 ) 244 245 self.beta_ = mo.safe_sparse_dot( 246 a=inv, 247 b=mo.crossprod(x=scaled_Z, y=Y, backend=self.backend), 248 backend=self.backend, 249 ) 250 self.coef_ = self.beta_ # sklearn compatibility 251 self.classes_ = np.unique(y) 252 self._is_fitted = True 253 return self 254 255 def predict(self, X, **kwargs): 256 """Predict test data X. 257 258 Args: 259 260 X: {array-like}, shape = [n_samples, n_features] 261 Training vectors, where n_samples is the number 262 of samples and n_features is the number of features. 263 264 **kwargs: additional parameters to be passed to 265 self.cook_test_set 266 267 Returns: 268 269 model predictions: {array-like} 270 271 """ 272 273 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 274 275 def predict_proba(self, X, **kwargs): 276 """Predict probabilities for test data X. 277 278 Args: 279 280 X: {array-like}, shape = [n_samples, n_features] 281 Training vectors, where n_samples is the number 282 of samples and n_features is the number of features. 283 284 **kwargs: additional parameters to be passed to 285 self.cook_test_set 286 287 Returns: 288 289 probability estimates for test data: {array-like} 290 291 """ 292 293 if len(X.shape) == 1: 294 n_features = X.shape[0] 295 new_X = mo.rbind( 296 x=X.reshape(1, n_features), 297 y=np.ones(n_features).reshape(1, n_features), 298 backend=self.backend, 299 ) 300 301 Z = self.cook_test_set(new_X, **kwargs) 302 303 else: 304 Z = self.cook_test_set(X, **kwargs) 305 306 ZB = mo.safe_sparse_dot(a=Z, b=self.beta_, backend=self.backend) 307 308 exp_ZB = np.exp(ZB) 309 310 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 311 312 def score(self, X, y, scoring=None): 313 """Scoring function for classification. 314 315 Args: 316 317 X: {array-like}, shape = [n_samples, n_features] 318 Training vectors, where n_samples is the number 319 of samples and n_features is the number of features. 320 321 y: array-like, shape = [n_samples] 322 Target values. 323 324 scoring: str 325 scoring method (default is accuracy) 326 327 Returns: 328 329 score: float 330 """ 331 332 if scoring is None: 333 scoring = "accuracy" 334 335 if scoring == "accuracy": 336 return skm2.accuracy_score(y, self.predict(X)) 337 338 if scoring == "f1": 339 return skm2.f1_score(y, self.predict(X)) 340 341 if scoring == "precision": 342 return skm2.precision_score(y, self.predict(X)) 343 344 if scoring == "recall": 345 return skm2.recall_score(y, self.predict(X)) 346 347 if scoring == "roc_auc": 348 return skm2.roc_auc_score(y, self.predict(X)) 349 350 if scoring == "log_loss": 351 return skm2.log_loss(y, self.predict_proba(X)) 352 353 if scoring == "balanced_accuracy": 354 return skm2.balanced_accuracy_score(y, self.predict(X)) 355 356 if scoring == "average_precision": 357 return skm2.average_precision_score(y, self.predict(X)) 358 359 if scoring == "neg_brier_score": 360 return -skm2.brier_score_loss(y, self.predict_proba(X)) 361 362 if scoring == "neg_log_loss": 363 return -skm2.log_loss(y, self.predict_proba(X)) 364 365 @property 366 def _estimator_type(self): 367 return "classifier" 368 369 def partial_fit( 370 self, X, y, classes=None, learning_rate=0.01, decay=0.001, **kwargs 371 ): 372 """Incrementally fit the Ridge model using SGD-style updates. 373 374 Uses the update rule: w_{n+1} = w_n + γ_n * x_n * [y_n - x_n^T * w_n] - γ_n * λ * w_n 375 for online learning with individual samples. 376 377 Args: 378 X: {array-like}, shape = [n_samples, n_features] 379 Training vectors for this batch 380 381 y: array-like, shape = [n_samples] 382 Target values for this batch 383 384 classes: array-like, shape = [n_classes], optional 385 List of all possible target classes. Must be provided on first call 386 to partial_fit if not already fitted. 387 388 learning_rate: float, default=0.01 389 Initial learning rate for SGD updates 390 391 decay: float, default=0.001 392 Learning rate decay parameter 393 394 **kwargs: additional parameters to be passed to self.cook_training_set 395 396 Returns: 397 self: object 398 """ 399 # Input validation 400 X = np.asarray(X) 401 y = np.asarray(y) 402 403 if X.shape[0] != y.shape[0]: 404 raise ValueError("X and y must have the same number of samples") 405 406 assert mx.is_factor(y), "y must contain only integers" 407 408 # Handle classes on first call 409 if not self._is_fitted: 410 if classes is not None: 411 self.classes_ = np.array(classes) 412 self.n_classes_ = len(self.classes_) 413 else: 414 self.classes_ = np.unique(y) 415 self.n_classes_ = len(self.classes_) 416 417 self.n_classes = len(self.classes_) 418 419 # Initialize learning parameters 420 self.initial_learning_rate = learning_rate 421 self.decay = decay 422 self._step_count = 0 423 self._is_fitted = True 424 425 else: 426 # Check for new classes 427 new_classes = np.setdiff1d(y, self.classes_) 428 if len(new_classes) > 0: 429 raise ValueError( 430 f"New classes {new_classes} encountered. " 431 "partial_fit cannot handle new classes after first call." 432 ) 433 434 # Process the batch 435 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 436 437 # Get dimensions 438 n_samples, n_features_total = scaled_Z.shape 439 n_original_features = X.shape[1] 440 441 # Create one-hot encoded targets 442 Y = mo.one_hot_encode2(output_y, self.n_classes) 443 444 # Determine feature dimensions for regularization 445 if self.n_clusters > 0: 446 if self.cluster_encode: 447 n_direct_features = n_original_features + self.n_clusters 448 else: 449 n_direct_features = n_original_features + 1 450 else: 451 n_direct_features = n_original_features 452 453 # Initialize beta_ if first time 454 if not hasattr(self, "beta_") or self.beta_ is None: 455 self.beta_ = np.zeros((n_features_total, self.n_classes)) 456 457 # Precompute indices for regularization 458 direct_indices = slice(0, n_direct_features) 459 hidden_indices = slice(n_direct_features, n_features_total) 460 461 # Process each sample with SGD 462 for i in range(n_samples): 463 self._step_count += 1 464 465 # Current learning rate with decay 466 current_lr = self.initial_learning_rate / ( 467 1 + self.decay * self._step_count 468 ) 469 470 # Current sample and target 471 x_i = scaled_Z[i, :] # Feature vector 472 y_i = Y[i, :] # Target vector (one-hot) 473 474 # Prediction: x_i^T * beta 475 prediction = x_i @ self.beta_ 476 477 # Error: y_i - prediction 478 error = y_i - prediction 479 480 # Gradient update: current_lr * x_i * error 481 gradient_update = current_lr * np.outer(x_i, error) 482 483 # Regularization terms (more efficient indexing) 484 reg_update = np.zeros_like(self.beta_) 485 reg_update[direct_indices, :] = ( 486 current_lr * self.lambda1 * self.beta_[direct_indices, :] 487 ) 488 reg_update[hidden_indices, :] = ( 489 current_lr * self.lambda2 * self.beta_[hidden_indices, :] 490 ) 491 492 # Combined update: beta = beta + gradient_update - reg_update 493 self.beta_ += gradient_update - reg_update 494 495 self.coef_ = self.beta_ # sklearn compatibility 496 497 return self
Multitask Ridge classification with 2 regularization parameters
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
lambda1: float
regularization parameter on direct link
lambda2: float
regularization parameter on hidden layer
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: {array-like}
regression coefficients
coef_: {array-like}
alias for `beta_`, regression coefficients
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=int(9.83730469e+01),
dropout=4.31054687e-01,
n_clusters=int(1.71484375e+00),
lambda1=1.24023438e+01, lambda2=7.30263672e+03)
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
164 def fit(self, X, y, **kwargs): 165 """Fit Ridge model to training data (X, y). 166 167 Args: 168 169 X: {array-like}, shape = [n_samples, n_features] 170 Training vectors, where n_samples is the number 171 of samples and n_features is the number of features. 172 173 y: array-like, shape = [n_samples] 174 Target values. 175 176 **kwargs: additional parameters to be passed to 177 self.cook_training_set or self.obj.fit 178 179 Returns: 180 181 self: object 182 183 """ 184 185 sys_platform = platform.system() 186 187 assert mx.is_factor(y), "y must contain only integers" 188 189 self.classes_ = np.unique(y) # for compatibility with sklearn 190 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 191 192 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 193 194 n_X, p_X = X.shape 195 n_Z, p_Z = scaled_Z.shape 196 197 self.n_classes = len(np.unique(y)) 198 199 # multitask response 200 Y = mo.one_hot_encode2(output_y, self.n_classes) 201 202 if self.n_clusters > 0: 203 if self.encode_clusters == True: 204 n_features = p_X + self.n_clusters 205 else: 206 n_features = p_X + 1 207 else: 208 n_features = p_X 209 210 X_ = scaled_Z[:, 0:n_features] 211 Phi_X_ = scaled_Z[:, n_features:p_Z] 212 213 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 214 np.repeat(1, X_.shape[1]) 215 ) 216 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 217 D = mo.crossprod( 218 x=Phi_X_, backend=self.backend 219 ) + self.lambda2 * np.diag(np.repeat(1, Phi_X_.shape[1])) 220 221 if sys_platform in ("Linux", "Darwin"): 222 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 223 else: 224 B_inv = pinv(B) 225 226 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 227 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 228 229 if sys_platform in ("Linux", "Darwin"): 230 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 231 else: 232 S_inv = pinv(S_mat) 233 234 Y2 = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 235 inv = mo.rbind( 236 mo.cbind( 237 x=B_inv + mo.crossprod(x=W, y=Y2, backend=self.backend), 238 y=-np.transpose(Y2), 239 backend=self.backend, 240 ), 241 mo.cbind(x=-Y2, y=S_inv, backend=self.backend), 242 backend=self.backend, 243 ) 244 245 self.beta_ = mo.safe_sparse_dot( 246 a=inv, 247 b=mo.crossprod(x=scaled_Z, y=Y, backend=self.backend), 248 backend=self.backend, 249 ) 250 self.coef_ = self.beta_ # sklearn compatibility 251 self.classes_ = np.unique(y) 252 self._is_fitted = True 253 return self
Fit Ridge model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
255 def predict(self, X, **kwargs): 256 """Predict test data X. 257 258 Args: 259 260 X: {array-like}, shape = [n_samples, n_features] 261 Training vectors, where n_samples is the number 262 of samples and n_features is the number of features. 263 264 **kwargs: additional parameters to be passed to 265 self.cook_test_set 266 267 Returns: 268 269 model predictions: {array-like} 270 271 """ 272 273 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
275 def predict_proba(self, X, **kwargs): 276 """Predict probabilities for test data X. 277 278 Args: 279 280 X: {array-like}, shape = [n_samples, n_features] 281 Training vectors, where n_samples is the number 282 of samples and n_features is the number of features. 283 284 **kwargs: additional parameters to be passed to 285 self.cook_test_set 286 287 Returns: 288 289 probability estimates for test data: {array-like} 290 291 """ 292 293 if len(X.shape) == 1: 294 n_features = X.shape[0] 295 new_X = mo.rbind( 296 x=X.reshape(1, n_features), 297 y=np.ones(n_features).reshape(1, n_features), 298 backend=self.backend, 299 ) 300 301 Z = self.cook_test_set(new_X, **kwargs) 302 303 else: 304 Z = self.cook_test_set(X, **kwargs) 305 306 ZB = mo.safe_sparse_dot(a=Z, b=self.beta_, backend=self.backend) 307 308 exp_ZB = np.exp(ZB) 309 310 return exp_ZB / exp_ZB.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
312 def score(self, X, y, scoring=None): 313 """Scoring function for classification. 314 315 Args: 316 317 X: {array-like}, shape = [n_samples, n_features] 318 Training vectors, where n_samples is the number 319 of samples and n_features is the number of features. 320 321 y: array-like, shape = [n_samples] 322 Target values. 323 324 scoring: str 325 scoring method (default is accuracy) 326 327 Returns: 328 329 score: float 330 """ 331 332 if scoring is None: 333 scoring = "accuracy" 334 335 if scoring == "accuracy": 336 return skm2.accuracy_score(y, self.predict(X)) 337 338 if scoring == "f1": 339 return skm2.f1_score(y, self.predict(X)) 340 341 if scoring == "precision": 342 return skm2.precision_score(y, self.predict(X)) 343 344 if scoring == "recall": 345 return skm2.recall_score(y, self.predict(X)) 346 347 if scoring == "roc_auc": 348 return skm2.roc_auc_score(y, self.predict(X)) 349 350 if scoring == "log_loss": 351 return skm2.log_loss(y, self.predict_proba(X)) 352 353 if scoring == "balanced_accuracy": 354 return skm2.balanced_accuracy_score(y, self.predict(X)) 355 356 if scoring == "average_precision": 357 return skm2.average_precision_score(y, self.predict(X)) 358 359 if scoring == "neg_brier_score": 360 return -skm2.brier_score_loss(y, self.predict_proba(X)) 361 362 if scoring == "neg_log_loss": 363 return -skm2.log_loss(y, self.predict_proba(X))
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
21class Ridge2Forecaster: 22 """Vectorized Ridge2 RVFL for multivariate time series forecasting. 23 24 Parameters 25 ---------- 26 lags : int, optional 27 Number of lags to use for feature engineering, by default 1 28 nb_hidden : int, optional 29 Number of hidden units, by default 5 30 activ : str, optional 31 Activation function, by default 'relu' 32 lambda_1 : float, optional 33 Ridge regularization parameter for input features, by default 0.1 34 lambda_2 : float, optional 35 Ridge regularization parameter for hidden units, by default 0.1 36 nodes_sim : str, optional 37 Type of quasi-random sequence for weight initialization, by default 'sobol' 38 seed : int, optional 39 Random seed for reproducibility, by default 42 40 """ 41 42 def __init__( 43 self, 44 lags=1, 45 nb_hidden=5, 46 activ="relu", 47 lambda_1=0.1, 48 lambda_2=0.1, 49 nodes_sim="sobol", 50 seed=42, 51 ): 52 if not JAX_AVAILABLE: 53 raise RuntimeError( 54 "JAX is required for this feature. Install with: pip install yourpackage[jax]" 55 ) 56 57 self.lags = lags 58 self.nb_hidden = nb_hidden 59 self.lambda_1 = lambda_1 60 self.lambda_2 = lambda_2 61 self.nodes_sim = nodes_sim 62 self.seed = seed 63 self.coef_ = None 64 65 # Activation functions 66 activations = { 67 "relu": lambda x: jnp.maximum(0, x), 68 "sigmoid": lambda x: 1 / (1 + jnp.exp(-x)), 69 "tanh": jnp.tanh, 70 "linear": lambda x: x, 71 } 72 self.activation = jax.jit(activations[activ]) 73 74 def _create_lags(self, y): 75 """Create lagged feature matrix (vectorized).""" 76 n, p = y.shape 77 X = jnp.concatenate( 78 [y[self.lags - i - 1: n - i - 1] for i in range(self.lags)], axis=1 79 ) 80 Y = y[self.lags:] 81 return X, Y 82 83 def _init_weights(self, n_features): 84 """Initialize hidden layer weights using quasi-random sequences.""" 85 total_dim = n_features * self.nb_hidden 86 87 if self.nodes_sim == "sobol": 88 sampler = qmc.Sobol(d=total_dim, scramble=False, seed=self.seed) 89 W = sampler.random(1).reshape(n_features, self.nb_hidden) 90 W = 2 * W - 1 91 else: 92 key = jax.random.PRNGKey(self.seed) 93 W = jax.random.uniform( 94 key, (n_features, self.nb_hidden), minval=-1, maxval=1 95 ) 96 97 return jnp.array(W) 98 99 if JAX_AVAILABLE: 100 101 @partial(jax.jit, static_argnums=(0,)) 102 def _compute_hidden(self, X, W): 103 """Compute hidden layer features (vectorized).""" 104 return self.activation(X @ W) 105 106 @partial(jax.jit, static_argnums=(0,)) 107 def _solve_ridge2(self, X, H, Y): 108 """Solve ridge regression with dual regularization.""" 109 n, p_x = X.shape 110 _, p_h = H.shape 111 112 Y_mean = jnp.mean(Y, axis=0) 113 Y_c = Y - Y_mean 114 115 X_mean = jnp.mean(X, axis=0) 116 X_std = jnp.std(X, axis=0) 117 X_std = jnp.where(X_std == 0, 1.0, X_std) 118 X_s = (X - X_mean) / X_std 119 120 H_mean = jnp.mean(H, axis=0) 121 H_std = jnp.std(H, axis=0) 122 H_std = jnp.where(H_std == 0, 1.0, H_std) 123 H_s = (H - H_mean) / H_std 124 125 XX = X_s.T @ X_s + self.lambda_1 * jnp.eye(p_x) 126 XH = X_s.T @ H_s 127 HH = H_s.T @ H_s + self.lambda_2 * jnp.eye(p_h) 128 129 XX_inv = jnp.linalg.inv(XX) 130 S = HH - XH.T @ XX_inv @ XH 131 S_inv = jnp.linalg.inv(S) 132 133 XY = X_s.T @ Y_c 134 HY = H_s.T @ Y_c 135 136 beta = XX_inv @ (XY - XH @ S_inv @ (HY - XH.T @ XX_inv @ XY)) 137 gamma = S_inv @ (HY - XH.T @ beta) 138 self.coef_ = jnp.concatenate([beta, gamma], axis=1) 139 140 return beta, gamma, Y_mean, X_mean, X_std, H_mean, H_std 141 142 def fit(self, y): 143 """Fit the Ridge2 model. 144 145 Parameters 146 ---------- 147 y : array-like of shape (n_samples,) 148 Target values. 149 """ 150 y = jnp.array(y) 151 if y.ndim == 1: 152 y = y[:, None] 153 154 X, Y = self._create_lags(y) 155 self.n_series = Y.shape[1] 156 157 self.W = self._init_weights(X.shape[1]) 158 H = self._compute_hidden(X, self.W) 159 160 ( 161 self.beta, 162 self.gamma, 163 self.Y_mean, 164 self.X_mean, 165 self.X_std, 166 self.H_mean, 167 self.H_std, 168 ) = self._solve_ridge2(X, H, Y) 169 170 # Compute residuals for prediction intervals 171 X_s = (X - self.X_mean) / self.X_std 172 H_s = (H - self.H_mean) / self.H_std 173 fitted = X_s @ self.beta + H_s @ self.gamma + self.Y_mean 174 self.residuals = np.array(Y - fitted) 175 176 self.last_obs = y[-self.lags:] 177 return self 178 179 if JAX_AVAILABLE: 180 181 @partial(jax.jit, static_argnums=(0,)) 182 def _predict_step(self, x_new): 183 """Single prediction step (JIT-compiled). 184 185 Parameters 186 ---------- 187 x_new : array-like of shape (n_features,) 188 New input data. 189 190 Returns 191 ------- 192 y_next : float 193 Next-step prediction. 194 """ 195 x_s = (x_new - self.X_mean) / self.X_std 196 h = self.activation(x_s @ self.W) 197 h_s = (h - self.H_mean) / self.H_std 198 return x_s @ self.beta + h_s @ self.gamma + self.Y_mean 199 200 def _forecast(self, h=5): 201 """Generate h-step ahead recursive forecasts. 202 203 Parameters 204 ---------- 205 h : int, optional 206 Number of steps to forecast, by default 5 207 208 Returns 209 ------- 210 forecasts : array-like of shape (h,) 211 Forecasted values. 212 """ 213 forecasts = [] 214 current = self.last_obs.copy() 215 216 for _ in range(h): 217 x_new = current.flatten()[None, :] 218 y_next = self._predict_step(x_new)[0] 219 forecasts.append(y_next) 220 current = jnp.vstack([current[1:], y_next]) 221 222 return jnp.array(forecasts) 223 224 def predict(self, h=5, level=None, method="gaussian", B=100): 225 """Generate prediction intervals with proper uncertainty propagation. 226 227 Parameters 228 ---------- 229 h : int, optional 230 Number of steps to forecast, by default 5 231 level : float, optional 232 Confidence level for prediction intervals, by default None 233 method : str, optional 234 Method for prediction intervals ('gaussian' or 'bootstrap'), by default 'gaussian' 235 B : int, optional 236 Number of bootstrap samples, by default 100 237 238 Returns 239 ------- 240 point_forecast : array-like of shape (h,) 241 Point forecasted values. 242 lower : array-like of shape (h,) 243 Lower bounds of prediction intervals. 244 upper : array-like of shape (h,) 245 Upper bounds of prediction intervals. 246 """ 247 248 point_forecast = self._forecast(h) 249 250 if level is None: 251 return point_forecast 252 253 # probabilistic prediction intervals 254 if method == "gaussian": 255 # Use residual std with horizon-dependent scaling 256 residual_std = np.std(self.residuals, axis=0) 257 z = norm.ppf(1 - (1 - level / 100) / 2) 258 259 # Scale uncertainty by sqrt(h) for each horizon 260 horizon_scale = np.sqrt(np.arange(1, h + 1))[:, None] 261 std_expanded = residual_std * horizon_scale 262 263 lower = point_forecast - z * std_expanded 264 upper = point_forecast + z * std_expanded 265 266 elif method == "bootstrap": 267 # Proper residual bootstrap 268 key = jax.random.PRNGKey(self.seed) 269 n_residuals = len(self.residuals) 270 sims = [] 271 272 for _ in range(B): 273 key, subkey = jax.random.split(key) 274 boot_indices = np.random.choice( 275 n_residuals, size=h, replace=True 276 ) 277 boot_resids = self.residuals[boot_indices] 278 279 current = self.last_obs.copy() 280 path = [] 281 282 for t in range(h): 283 x_new = current.flatten()[None, :] 284 y_pred = self._predict_step(x_new)[0] 285 y_sim = y_pred + boot_resids[t] 286 path.append(y_sim) 287 current = jnp.vstack([current[1:], y_sim]) 288 289 sims.append(jnp.array(path)) 290 291 sims = jnp.array(sims) 292 lower = jnp.percentile(sims, (100 - level) / 2, axis=0) 293 upper = jnp.percentile(sims, 100 - (100 - level) / 2, axis=0) 294 295 return { 296 "mean": np.array(point_forecast), 297 "lower": np.array(lower), 298 "upper": np.array(upper), 299 }
Vectorized Ridge2 RVFL for multivariate time series forecasting.
Parameters
lags : int, optional Number of lags to use for feature engineering, by default 1 nb_hidden : int, optional Number of hidden units, by default 5 activ : str, optional Activation function, by default 'relu' lambda_1 : float, optional Ridge regularization parameter for input features, by default 0.1 lambda_2 : float, optional Ridge regularization parameter for hidden units, by default 0.1 nodes_sim : str, optional Type of quasi-random sequence for weight initialization, by default 'sobol' seed : int, optional Random seed for reproducibility, by default 42
142 def fit(self, y): 143 """Fit the Ridge2 model. 144 145 Parameters 146 ---------- 147 y : array-like of shape (n_samples,) 148 Target values. 149 """ 150 y = jnp.array(y) 151 if y.ndim == 1: 152 y = y[:, None] 153 154 X, Y = self._create_lags(y) 155 self.n_series = Y.shape[1] 156 157 self.W = self._init_weights(X.shape[1]) 158 H = self._compute_hidden(X, self.W) 159 160 ( 161 self.beta, 162 self.gamma, 163 self.Y_mean, 164 self.X_mean, 165 self.X_std, 166 self.H_mean, 167 self.H_std, 168 ) = self._solve_ridge2(X, H, Y) 169 170 # Compute residuals for prediction intervals 171 X_s = (X - self.X_mean) / self.X_std 172 H_s = (H - self.H_mean) / self.H_std 173 fitted = X_s @ self.beta + H_s @ self.gamma + self.Y_mean 174 self.residuals = np.array(Y - fitted) 175 176 self.last_obs = y[-self.lags:] 177 return self
Fit the Ridge2 model.
Parameters
y : array-like of shape (n_samples,) Target values.
224 def predict(self, h=5, level=None, method="gaussian", B=100): 225 """Generate prediction intervals with proper uncertainty propagation. 226 227 Parameters 228 ---------- 229 h : int, optional 230 Number of steps to forecast, by default 5 231 level : float, optional 232 Confidence level for prediction intervals, by default None 233 method : str, optional 234 Method for prediction intervals ('gaussian' or 'bootstrap'), by default 'gaussian' 235 B : int, optional 236 Number of bootstrap samples, by default 100 237 238 Returns 239 ------- 240 point_forecast : array-like of shape (h,) 241 Point forecasted values. 242 lower : array-like of shape (h,) 243 Lower bounds of prediction intervals. 244 upper : array-like of shape (h,) 245 Upper bounds of prediction intervals. 246 """ 247 248 point_forecast = self._forecast(h) 249 250 if level is None: 251 return point_forecast 252 253 # probabilistic prediction intervals 254 if method == "gaussian": 255 # Use residual std with horizon-dependent scaling 256 residual_std = np.std(self.residuals, axis=0) 257 z = norm.ppf(1 - (1 - level / 100) / 2) 258 259 # Scale uncertainty by sqrt(h) for each horizon 260 horizon_scale = np.sqrt(np.arange(1, h + 1))[:, None] 261 std_expanded = residual_std * horizon_scale 262 263 lower = point_forecast - z * std_expanded 264 upper = point_forecast + z * std_expanded 265 266 elif method == "bootstrap": 267 # Proper residual bootstrap 268 key = jax.random.PRNGKey(self.seed) 269 n_residuals = len(self.residuals) 270 sims = [] 271 272 for _ in range(B): 273 key, subkey = jax.random.split(key) 274 boot_indices = np.random.choice( 275 n_residuals, size=h, replace=True 276 ) 277 boot_resids = self.residuals[boot_indices] 278 279 current = self.last_obs.copy() 280 path = [] 281 282 for t in range(h): 283 x_new = current.flatten()[None, :] 284 y_pred = self._predict_step(x_new)[0] 285 y_sim = y_pred + boot_resids[t] 286 path.append(y_sim) 287 current = jnp.vstack([current[1:], y_sim]) 288 289 sims.append(jnp.array(path)) 290 291 sims = jnp.array(sims) 292 lower = jnp.percentile(sims, (100 - level) / 2, axis=0) 293 upper = jnp.percentile(sims, 100 - (100 - level) / 2, axis=0) 294 295 return { 296 "mean": np.array(point_forecast), 297 "lower": np.array(lower), 298 "upper": np.array(upper), 299 }
Generate prediction intervals with proper uncertainty propagation.
Parameters
h : int, optional Number of steps to forecast, by default 5 level : float, optional Confidence level for prediction intervals, by default None method : str, optional Method for prediction intervals ('gaussian' or 'bootstrap'), by default 'gaussian' B : int, optional Number of bootstrap samples, by default 100
Returns
point_forecast : array-like of shape (h,) Point forecasted values. lower : array-like of shape (h,) Lower bounds of prediction intervals. upper : array-like of shape (h,) Upper bounds of prediction intervals.
6class SubSampler: 7 """Subsampling class. 8 9 Attributes: 10 11 y: array-like, shape = [n_samples] 12 Target values. 13 14 row_sample: double 15 subsampling fraction 16 17 n_samples: int 18 subsampling by using the number of rows (supersedes row_sample) 19 20 seed: int 21 reproductibility seed 22 23 n_jobs: int 24 number of jobs to run in parallel 25 26 verbose: bool 27 print progress messages and bars 28 """ 29 30 def __init__( 31 self, 32 y, 33 row_sample=0.8, 34 n_samples=None, 35 seed=123, 36 n_jobs=None, 37 verbose=False, 38 ): 39 self.y = y 40 self.n_samples = n_samples 41 if self.n_samples is None: 42 assert ( 43 row_sample < 1 and row_sample >= 0 44 ), "'row_sample' must be provided, plus < 1 and >= 0" 45 self.row_sample = row_sample 46 else: 47 assert self.n_samples < len(y), "'n_samples' must be < len(y)" 48 self.row_sample = self.n_samples / len(y) 49 self.seed = seed 50 self.indices = None 51 self.n_jobs = n_jobs 52 self.verbose = verbose 53 54 def subsample(self): 55 """Returns indices of subsampled input data. 56 57 Examples: 58 59 <ul> 60 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240105_subsampling.ipynb">20240105_subsampling.ipynb</a> </li> 61 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240131_subsampling_nsamples.ipynb">20240131_subsampling_nsamples.ipynb</a> </li> 62 </ul> 63 64 """ 65 self.indices = dosubsample( 66 y=self.y, 67 row_sample=self.row_sample, 68 seed=self.seed, 69 n_jobs=self.n_jobs, 70 verbose=self.verbose, 71 ) 72 return self.indices
Subsampling class.
Attributes:
y: array-like, shape = [n_samples] Target values.
row_sample: double subsampling fraction
n_samples: int subsampling by using the number of rows (supersedes row_sample)
seed: int reproductibility seed
n_jobs: int number of jobs to run in parallel
verbose: bool print progress messages and bars
54 def subsample(self): 55 """Returns indices of subsampled input data. 56 57 Examples: 58 59 <ul> 60 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240105_subsampling.ipynb">20240105_subsampling.ipynb</a> </li> 61 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240131_subsampling_nsamples.ipynb">20240131_subsampling_nsamples.ipynb</a> </li> 62 </ul> 63 64 """ 65 self.indices = dosubsample( 66 y=self.y, 67 row_sample=self.row_sample, 68 seed=self.seed, 69 n_jobs=self.n_jobs, 70 verbose=self.verbose, 71 ) 72 return self.indices
Returns indices of subsampled input data.
Examples: